258 raise decodermod.CBORDecodeError( |
264 raise decodermod.CBORDecodeError( |
259 'failed to read bytestring chunk: got %d bytes; expected %d' % ( |
265 'failed to read bytestring chunk: got %d bytes; expected %d' % ( |
260 len(chunk), length)) |
266 len(chunk), length)) |
261 |
267 |
262 yield chunk |
268 yield chunk |
|
269 |
|
270 class CBORDecodeError(Exception): |
|
271 """Represents an error decoding CBOR.""" |
|
272 |
|
273 if sys.version_info.major >= 3: |
|
274 def _elementtointeger(b, i): |
|
275 return b[i] |
|
276 else: |
|
277 def _elementtointeger(b, i): |
|
278 return ord(b[i]) |
|
279 |
|
280 STRUCT_BIG_UBYTE = struct.Struct(r'>B') |
|
281 STRUCT_BIG_USHORT = struct.Struct('>H') |
|
282 STRUCT_BIG_ULONG = struct.Struct('>L') |
|
283 STRUCT_BIG_ULONGLONG = struct.Struct('>Q') |
|
284 |
|
285 SPECIAL_NONE = 0 |
|
286 SPECIAL_START_INDEFINITE_BYTESTRING = 1 |
|
287 SPECIAL_START_ARRAY = 2 |
|
288 SPECIAL_START_MAP = 3 |
|
289 SPECIAL_START_SET = 4 |
|
290 SPECIAL_INDEFINITE_BREAK = 5 |
|
291 |
|
292 def decodeitem(b, offset=0): |
|
293 """Decode a new CBOR value from a buffer at offset. |
|
294 |
|
295 This function attempts to decode up to one complete CBOR value |
|
296 from ``b`` starting at offset ``offset``. |
|
297 |
|
298 The beginning of a collection (such as an array, map, set, or |
|
299 indefinite length bytestring) counts as a single value. For these |
|
300 special cases, a state flag will indicate that a special value was seen. |
|
301 |
|
302 When called, the function either returns a decoded value or gives |
|
303 a hint as to how many more bytes are needed to do so. By calling |
|
304 the function repeatedly given a stream of bytes, the caller can |
|
305 build up the original values. |
|
306 |
|
307 Returns a tuple with the following elements: |
|
308 |
|
309 * Bool indicating whether a complete value was decoded. |
|
310 * A decoded value if first value is True otherwise None |
|
311 * Integer number of bytes. If positive, the number of bytes |
|
312 read. If negative, the number of bytes we need to read to |
|
313 decode this value or the next chunk in this value. |
|
314 * One of the ``SPECIAL_*`` constants indicating special treatment |
|
315 for this value. ``SPECIAL_NONE`` means this is a fully decoded |
|
316 simple value (such as an integer or bool). |
|
317 """ |
|
318 |
|
319 initial = _elementtointeger(b, offset) |
|
320 offset += 1 |
|
321 |
|
322 majortype = initial >> 5 |
|
323 subtype = initial & SUBTYPE_MASK |
|
324 |
|
325 if majortype == MAJOR_TYPE_UINT: |
|
326 complete, value, readcount = decodeuint(subtype, b, offset) |
|
327 |
|
328 if complete: |
|
329 return True, value, readcount + 1, SPECIAL_NONE |
|
330 else: |
|
331 return False, None, readcount, SPECIAL_NONE |
|
332 |
|
333 elif majortype == MAJOR_TYPE_NEGINT: |
|
334 # Negative integers are the same as UINT except inverted minus 1. |
|
335 complete, value, readcount = decodeuint(subtype, b, offset) |
|
336 |
|
337 if complete: |
|
338 return True, -value - 1, readcount + 1, SPECIAL_NONE |
|
339 else: |
|
340 return False, None, readcount, SPECIAL_NONE |
|
341 |
|
342 elif majortype == MAJOR_TYPE_BYTESTRING: |
|
343 # Beginning of bytestrings are treated as uints in order to |
|
344 # decode their length, which may be indefinite. |
|
345 complete, size, readcount = decodeuint(subtype, b, offset, |
|
346 allowindefinite=True) |
|
347 |
|
348 # We don't know the size of the bytestring. It must be a definitive |
|
349 # length since the indefinite subtype would be encoded in the initial |
|
350 # byte. |
|
351 if not complete: |
|
352 return False, None, readcount, SPECIAL_NONE |
|
353 |
|
354 # We know the length of the bytestring. |
|
355 if size is not None: |
|
356 # And the data is available in the buffer. |
|
357 if offset + readcount + size <= len(b): |
|
358 value = b[offset + readcount:offset + readcount + size] |
|
359 return True, value, readcount + size + 1, SPECIAL_NONE |
|
360 |
|
361 # And we need more data in order to return the bytestring. |
|
362 else: |
|
363 wanted = len(b) - offset - readcount - size |
|
364 return False, None, wanted, SPECIAL_NONE |
|
365 |
|
366 # It is an indefinite length bytestring. |
|
367 else: |
|
368 return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING |
|
369 |
|
370 elif majortype == MAJOR_TYPE_STRING: |
|
371 raise CBORDecodeError('string major type not supported') |
|
372 |
|
373 elif majortype == MAJOR_TYPE_ARRAY: |
|
374 # Beginning of arrays are treated as uints in order to decode their |
|
375 # length. We don't allow indefinite length arrays. |
|
376 complete, size, readcount = decodeuint(subtype, b, offset) |
|
377 |
|
378 if complete: |
|
379 return True, size, readcount + 1, SPECIAL_START_ARRAY |
|
380 else: |
|
381 return False, None, readcount, SPECIAL_NONE |
|
382 |
|
383 elif majortype == MAJOR_TYPE_MAP: |
|
384 # Beginning of maps are treated as uints in order to decode their |
|
385 # number of elements. We don't allow indefinite length arrays. |
|
386 complete, size, readcount = decodeuint(subtype, b, offset) |
|
387 |
|
388 if complete: |
|
389 return True, size, readcount + 1, SPECIAL_START_MAP |
|
390 else: |
|
391 return False, None, readcount, SPECIAL_NONE |
|
392 |
|
393 elif majortype == MAJOR_TYPE_SEMANTIC: |
|
394 # Semantic tag value is read the same as a uint. |
|
395 complete, tagvalue, readcount = decodeuint(subtype, b, offset) |
|
396 |
|
397 if not complete: |
|
398 return False, None, readcount, SPECIAL_NONE |
|
399 |
|
400 # This behavior here is a little wonky. The main type being "decorated" |
|
401 # by this semantic tag follows. A more robust parser would probably emit |
|
402 # a special flag indicating this as a semantic tag and let the caller |
|
403 # deal with the types that follow. But since we don't support many |
|
404 # semantic tags, it is easier to deal with the special cases here and |
|
405 # hide complexity from the caller. If we add support for more semantic |
|
406 # tags, we should probably move semantic tag handling into the caller. |
|
407 if tagvalue == SEMANTIC_TAG_FINITE_SET: |
|
408 if offset + readcount >= len(b): |
|
409 return False, None, -1, SPECIAL_NONE |
|
410 |
|
411 complete, size, readcount2, special = decodeitem(b, |
|
412 offset + readcount) |
|
413 |
|
414 if not complete: |
|
415 return False, None, readcount2, SPECIAL_NONE |
|
416 |
|
417 if special != SPECIAL_START_ARRAY: |
|
418 raise CBORDecodeError('expected array after finite set ' |
|
419 'semantic tag') |
|
420 |
|
421 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET |
|
422 |
|
423 else: |
|
424 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue) |
|
425 |
|
426 elif majortype == MAJOR_TYPE_SPECIAL: |
|
427 # Only specific values for the information field are allowed. |
|
428 if subtype == SUBTYPE_FALSE: |
|
429 return True, False, 1, SPECIAL_NONE |
|
430 elif subtype == SUBTYPE_TRUE: |
|
431 return True, True, 1, SPECIAL_NONE |
|
432 elif subtype == SUBTYPE_NULL: |
|
433 return True, None, 1, SPECIAL_NONE |
|
434 elif subtype == SUBTYPE_INDEFINITE: |
|
435 return True, None, 1, SPECIAL_INDEFINITE_BREAK |
|
436 # If value is 24, subtype is in next byte. |
|
437 else: |
|
438 raise CBORDecodeError('special type %d not allowed' % subtype) |
|
439 else: |
|
440 assert False |
|
441 |
|
442 def decodeuint(subtype, b, offset=0, allowindefinite=False): |
|
443 """Decode an unsigned integer. |
|
444 |
|
445 ``subtype`` is the lower 5 bits from the initial byte CBOR item |
|
446 "header." ``b`` is a buffer containing bytes. ``offset`` points to |
|
447 the index of the first byte after the byte that ``subtype`` was |
|
448 derived from. |
|
449 |
|
450 ``allowindefinite`` allows the special indefinite length value |
|
451 indicator. |
|
452 |
|
453 Returns a 3-tuple of (successful, value, count). |
|
454 |
|
455 The first element is a bool indicating if decoding completed. The 2nd |
|
456 is the decoded integer value or None if not fully decoded or the subtype |
|
457 is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes. |
|
458 If positive, it is the number of additional bytes decoded. If negative, |
|
459 it is the number of additional bytes needed to decode this value. |
|
460 """ |
|
461 |
|
462 # Small values are inline. |
|
463 if subtype < 24: |
|
464 return True, subtype, 0 |
|
465 # Indefinite length specifier. |
|
466 elif subtype == 31: |
|
467 if allowindefinite: |
|
468 return True, None, 0 |
|
469 else: |
|
470 raise CBORDecodeError('indefinite length uint not allowed here') |
|
471 elif subtype >= 28: |
|
472 raise CBORDecodeError('unsupported subtype on integer type: %d' % |
|
473 subtype) |
|
474 |
|
475 if subtype == 24: |
|
476 s = STRUCT_BIG_UBYTE |
|
477 elif subtype == 25: |
|
478 s = STRUCT_BIG_USHORT |
|
479 elif subtype == 26: |
|
480 s = STRUCT_BIG_ULONG |
|
481 elif subtype == 27: |
|
482 s = STRUCT_BIG_ULONGLONG |
|
483 else: |
|
484 raise CBORDecodeError('bounds condition checking violation') |
|
485 |
|
486 if len(b) - offset >= s.size: |
|
487 return True, s.unpack_from(b, offset)[0], s.size |
|
488 else: |
|
489 return False, None, len(b) - offset - s.size |
|
490 |
|
491 class bytestringchunk(bytes): |
|
492 """Represents a chunk/segment in an indefinite length bytestring. |
|
493 |
|
494 This behaves like a ``bytes`` but in addition has the ``isfirst`` |
|
495 and ``islast`` attributes indicating whether this chunk is the first |
|
496 or last in an indefinite length bytestring. |
|
497 """ |
|
498 |
|
499 def __new__(cls, v, first=False, last=False): |
|
500 self = bytes.__new__(cls, v) |
|
501 self.isfirst = first |
|
502 self.islast = last |
|
503 |
|
504 return self |
|
505 |
|
506 class sansiodecoder(object): |
|
507 """A CBOR decoder that doesn't perform its own I/O. |
|
508 |
|
509 To use, construct an instance and feed it segments containing |
|
510 CBOR-encoded bytes via ``decode()``. The return value from ``decode()`` |
|
511 indicates whether a fully-decoded value is available, how many bytes |
|
512 were consumed, and offers a hint as to how many bytes should be fed |
|
513 in next time to decode the next value. |
|
514 |
|
515 The decoder assumes it will decode N discrete CBOR values, not just |
|
516 a single value. i.e. if the bytestream contains uints packed one after |
|
517 the other, the decoder will decode them all, rather than just the initial |
|
518 one. |
|
519 |
|
520 When ``decode()`` indicates a value is available, call ``getavailable()`` |
|
521 to return all fully decoded values. |
|
522 |
|
523 ``decode()`` can partially decode input. It is up to the caller to keep |
|
524 track of what data was consumed and to pass unconsumed data in on the |
|
525 next invocation. |
|
526 |
|
527 The decoder decodes atomically at the *item* level. See ``decodeitem()``. |
|
528 If an *item* cannot be fully decoded, the decoder won't record it as |
|
529 partially consumed. Instead, the caller will be instructed to pass in |
|
530 the initial bytes of this item on the next invocation. This does result |
|
531 in some redundant parsing. But the overhead should be minimal. |
|
532 |
|
533 This decoder only supports a subset of CBOR as required by Mercurial. |
|
534 It lacks support for: |
|
535 |
|
536 * Indefinite length arrays |
|
537 * Indefinite length maps |
|
538 * Use of indefinite length bytestrings as keys or values within |
|
539 arrays, maps, or sets. |
|
540 * Nested arrays, maps, or sets within sets |
|
541 * Any semantic tag that isn't a mathematical finite set |
|
542 * Floating point numbers |
|
543 * Undefined special value |
|
544 |
|
545 CBOR types are decoded to Python types as follows: |
|
546 |
|
547 uint -> int |
|
548 negint -> int |
|
549 bytestring -> bytes |
|
550 map -> dict |
|
551 array -> list |
|
552 True -> bool |
|
553 False -> bool |
|
554 null -> None |
|
555 indefinite length bytestring chunk -> [bytestringchunk] |
|
556 |
|
557 The only non-obvious mapping here is an indefinite length bytestring |
|
558 to the ``bytestringchunk`` type. This is to facilitate streaming |
|
559 indefinite length bytestrings out of the decoder and to differentiate |
|
560 a regular bytestring from an indefinite length bytestring. |
|
561 """ |
|
562 |
|
563 _STATE_NONE = 0 |
|
564 _STATE_WANT_MAP_KEY = 1 |
|
565 _STATE_WANT_MAP_VALUE = 2 |
|
566 _STATE_WANT_ARRAY_VALUE = 3 |
|
567 _STATE_WANT_SET_VALUE = 4 |
|
568 _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5 |
|
569 _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6 |
|
570 |
|
571 def __init__(self): |
|
572 # TODO add support for limiting size of bytestrings |
|
573 # TODO add support for limiting number of keys / values in collections |
|
574 # TODO add support for limiting size of buffered partial values |
|
575 |
|
576 self.decodedbytecount = 0 |
|
577 |
|
578 self._state = self._STATE_NONE |
|
579 |
|
580 # Stack of active nested collections. Each entry is a dict describing |
|
581 # the collection. |
|
582 self._collectionstack = [] |
|
583 |
|
584 # Fully decoded key to use for the current map. |
|
585 self._currentmapkey = None |
|
586 |
|
587 # Fully decoded values available for retrieval. |
|
588 self._decodedvalues = [] |
|
589 |
|
590 @property |
|
591 def inprogress(self): |
|
592 """Whether the decoder has partially decoded a value.""" |
|
593 return self._state != self._STATE_NONE |
|
594 |
|
595 def decode(self, b, offset=0): |
|
596 """Attempt to decode bytes from an input buffer. |
|
597 |
|
598 ``b`` is a collection of bytes and ``offset`` is the byte |
|
599 offset within that buffer from which to begin reading data. |
|
600 |
|
601 ``b`` must support ``len()`` and accessing bytes slices via |
|
602 ``__slice__``. Typically ``bytes`` instances are used. |
|
603 |
|
604 Returns a tuple with the following fields: |
|
605 |
|
606 * Bool indicating whether values are available for retrieval. |
|
607 * Integer indicating the number of bytes that were fully consumed, |
|
608 starting from ``offset``. |
|
609 * Integer indicating the number of bytes that are desired for the |
|
610 next call in order to decode an item. |
|
611 """ |
|
612 if not b: |
|
613 return bool(self._decodedvalues), 0, 0 |
|
614 |
|
615 initialoffset = offset |
|
616 |
|
617 # We could easily split the body of this loop into a function. But |
|
618 # Python performance is sensitive to function calls and collections |
|
619 # are composed of many items. So leaving as a while loop could help |
|
620 # with performance. One thing that may not help is the use of |
|
621 # if..elif versus a lookup/dispatch table. There may be value |
|
622 # in switching that. |
|
623 while offset < len(b): |
|
624 # Attempt to decode an item. This could be a whole value or a |
|
625 # special value indicating an event, such as start or end of a |
|
626 # collection or indefinite length type. |
|
627 complete, value, readcount, special = decodeitem(b, offset) |
|
628 |
|
629 if readcount > 0: |
|
630 self.decodedbytecount += readcount |
|
631 |
|
632 if not complete: |
|
633 assert readcount < 0 |
|
634 return ( |
|
635 bool(self._decodedvalues), |
|
636 offset - initialoffset, |
|
637 -readcount, |
|
638 ) |
|
639 |
|
640 offset += readcount |
|
641 |
|
642 # No nested state. We either have a full value or beginning of a |
|
643 # complex value to deal with. |
|
644 if self._state == self._STATE_NONE: |
|
645 # A normal value. |
|
646 if special == SPECIAL_NONE: |
|
647 self._decodedvalues.append(value) |
|
648 |
|
649 elif special == SPECIAL_START_ARRAY: |
|
650 self._collectionstack.append({ |
|
651 'remaining': value, |
|
652 'v': [], |
|
653 }) |
|
654 self._state = self._STATE_WANT_ARRAY_VALUE |
|
655 |
|
656 elif special == SPECIAL_START_MAP: |
|
657 self._collectionstack.append({ |
|
658 'remaining': value, |
|
659 'v': {}, |
|
660 }) |
|
661 self._state = self._STATE_WANT_MAP_KEY |
|
662 |
|
663 elif special == SPECIAL_START_SET: |
|
664 self._collectionstack.append({ |
|
665 'remaining': value, |
|
666 'v': set(), |
|
667 }) |
|
668 self._state = self._STATE_WANT_SET_VALUE |
|
669 |
|
670 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
|
671 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST |
|
672 |
|
673 else: |
|
674 raise CBORDecodeError('unhandled special state: %d' % |
|
675 special) |
|
676 |
|
677 # This value becomes an element of the current array. |
|
678 elif self._state == self._STATE_WANT_ARRAY_VALUE: |
|
679 # Simple values get appended. |
|
680 if special == SPECIAL_NONE: |
|
681 c = self._collectionstack[-1] |
|
682 c['v'].append(value) |
|
683 c['remaining'] -= 1 |
|
684 |
|
685 # self._state doesn't need changed. |
|
686 |
|
687 # An array nested within an array. |
|
688 elif special == SPECIAL_START_ARRAY: |
|
689 lastc = self._collectionstack[-1] |
|
690 newvalue = [] |
|
691 |
|
692 lastc['v'].append(newvalue) |
|
693 lastc['remaining'] -= 1 |
|
694 |
|
695 self._collectionstack.append({ |
|
696 'remaining': value, |
|
697 'v': newvalue, |
|
698 }) |
|
699 |
|
700 # self._state doesn't need changed. |
|
701 |
|
702 # A map nested within an array. |
|
703 elif special == SPECIAL_START_MAP: |
|
704 lastc = self._collectionstack[-1] |
|
705 newvalue = {} |
|
706 |
|
707 lastc['v'].append(newvalue) |
|
708 lastc['remaining'] -= 1 |
|
709 |
|
710 self._collectionstack.append({ |
|
711 'remaining': value, |
|
712 'v': newvalue |
|
713 }) |
|
714 |
|
715 self._state = self._STATE_WANT_MAP_KEY |
|
716 |
|
717 elif special == SPECIAL_START_SET: |
|
718 lastc = self._collectionstack[-1] |
|
719 newvalue = set() |
|
720 |
|
721 lastc['v'].append(newvalue) |
|
722 lastc['remaining'] -= 1 |
|
723 |
|
724 self._collectionstack.append({ |
|
725 'remaining': value, |
|
726 'v': newvalue, |
|
727 }) |
|
728 |
|
729 self._state = self._STATE_WANT_SET_VALUE |
|
730 |
|
731 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
|
732 raise CBORDecodeError('indefinite length bytestrings ' |
|
733 'not allowed as array values') |
|
734 |
|
735 else: |
|
736 raise CBORDecodeError('unhandled special item when ' |
|
737 'expecting array value: %d' % special) |
|
738 |
|
739 # This value becomes the key of the current map instance. |
|
740 elif self._state == self._STATE_WANT_MAP_KEY: |
|
741 if special == SPECIAL_NONE: |
|
742 self._currentmapkey = value |
|
743 self._state = self._STATE_WANT_MAP_VALUE |
|
744 |
|
745 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
|
746 raise CBORDecodeError('indefinite length bytestrings ' |
|
747 'not allowed as map keys') |
|
748 |
|
749 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP, |
|
750 SPECIAL_START_SET): |
|
751 raise CBORDecodeError('collections not supported as map ' |
|
752 'keys') |
|
753 |
|
754 # We do not allow special values to be used as map keys. |
|
755 else: |
|
756 raise CBORDecodeError('unhandled special item when ' |
|
757 'expecting map key: %d' % special) |
|
758 |
|
759 # This value becomes the value of the current map key. |
|
760 elif self._state == self._STATE_WANT_MAP_VALUE: |
|
761 # Simple values simply get inserted into the map. |
|
762 if special == SPECIAL_NONE: |
|
763 lastc = self._collectionstack[-1] |
|
764 lastc['v'][self._currentmapkey] = value |
|
765 lastc['remaining'] -= 1 |
|
766 |
|
767 self._state = self._STATE_WANT_MAP_KEY |
|
768 |
|
769 # A new array is used as the map value. |
|
770 elif special == SPECIAL_START_ARRAY: |
|
771 lastc = self._collectionstack[-1] |
|
772 newvalue = [] |
|
773 |
|
774 lastc['v'][self._currentmapkey] = newvalue |
|
775 lastc['remaining'] -= 1 |
|
776 |
|
777 self._collectionstack.append({ |
|
778 'remaining': value, |
|
779 'v': newvalue, |
|
780 }) |
|
781 |
|
782 self._state = self._STATE_WANT_ARRAY_VALUE |
|
783 |
|
784 # A new map is used as the map value. |
|
785 elif special == SPECIAL_START_MAP: |
|
786 lastc = self._collectionstack[-1] |
|
787 newvalue = {} |
|
788 |
|
789 lastc['v'][self._currentmapkey] = newvalue |
|
790 lastc['remaining'] -= 1 |
|
791 |
|
792 self._collectionstack.append({ |
|
793 'remaining': value, |
|
794 'v': newvalue, |
|
795 }) |
|
796 |
|
797 self._state = self._STATE_WANT_MAP_KEY |
|
798 |
|
799 # A new set is used as the map value. |
|
800 elif special == SPECIAL_START_SET: |
|
801 lastc = self._collectionstack[-1] |
|
802 newvalue = set() |
|
803 |
|
804 lastc['v'][self._currentmapkey] = newvalue |
|
805 lastc['remaining'] -= 1 |
|
806 |
|
807 self._collectionstack.append({ |
|
808 'remaining': value, |
|
809 'v': newvalue, |
|
810 }) |
|
811 |
|
812 self._state = self._STATE_WANT_SET_VALUE |
|
813 |
|
814 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
|
815 raise CBORDecodeError('indefinite length bytestrings not ' |
|
816 'allowed as map values') |
|
817 |
|
818 else: |
|
819 raise CBORDecodeError('unhandled special item when ' |
|
820 'expecting map value: %d' % special) |
|
821 |
|
822 self._currentmapkey = None |
|
823 |
|
824 # This value is added to the current set. |
|
825 elif self._state == self._STATE_WANT_SET_VALUE: |
|
826 if special == SPECIAL_NONE: |
|
827 lastc = self._collectionstack[-1] |
|
828 lastc['v'].add(value) |
|
829 lastc['remaining'] -= 1 |
|
830 |
|
831 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: |
|
832 raise CBORDecodeError('indefinite length bytestrings not ' |
|
833 'allowed as set values') |
|
834 |
|
835 elif special in (SPECIAL_START_ARRAY, |
|
836 SPECIAL_START_MAP, |
|
837 SPECIAL_START_SET): |
|
838 raise CBORDecodeError('collections not allowed as set ' |
|
839 'values') |
|
840 |
|
841 # We don't allow non-trivial types to exist as set values. |
|
842 else: |
|
843 raise CBORDecodeError('unhandled special item when ' |
|
844 'expecting set value: %d' % special) |
|
845 |
|
846 # This value represents the first chunk in an indefinite length |
|
847 # bytestring. |
|
848 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST: |
|
849 # We received a full chunk. |
|
850 if special == SPECIAL_NONE: |
|
851 self._decodedvalues.append(bytestringchunk(value, |
|
852 first=True)) |
|
853 |
|
854 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT |
|
855 |
|
856 # The end of stream marker. This means it is an empty |
|
857 # indefinite length bytestring. |
|
858 elif special == SPECIAL_INDEFINITE_BREAK: |
|
859 # We /could/ convert this to a b''. But we want to preserve |
|
860 # the nature of the underlying data so consumers expecting |
|
861 # an indefinite length bytestring get one. |
|
862 self._decodedvalues.append(bytestringchunk(b'', |
|
863 first=True, |
|
864 last=True)) |
|
865 |
|
866 # Since indefinite length bytestrings can't be used in |
|
867 # collections, we must be at the root level. |
|
868 assert not self._collectionstack |
|
869 self._state = self._STATE_NONE |
|
870 |
|
871 else: |
|
872 raise CBORDecodeError('unexpected special value when ' |
|
873 'expecting bytestring chunk: %d' % |
|
874 special) |
|
875 |
|
876 # This value represents the non-initial chunk in an indefinite |
|
877 # length bytestring. |
|
878 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT: |
|
879 # We received a full chunk. |
|
880 if special == SPECIAL_NONE: |
|
881 self._decodedvalues.append(bytestringchunk(value)) |
|
882 |
|
883 # The end of stream marker. |
|
884 elif special == SPECIAL_INDEFINITE_BREAK: |
|
885 self._decodedvalues.append(bytestringchunk(b'', last=True)) |
|
886 |
|
887 # Since indefinite length bytestrings can't be used in |
|
888 # collections, we must be at the root level. |
|
889 assert not self._collectionstack |
|
890 self._state = self._STATE_NONE |
|
891 |
|
892 else: |
|
893 raise CBORDecodeError('unexpected special value when ' |
|
894 'expecting bytestring chunk: %d' % |
|
895 special) |
|
896 |
|
897 else: |
|
898 raise CBORDecodeError('unhandled decoder state: %d' % |
|
899 self._state) |
|
900 |
|
901 # We could have just added the final value in a collection. End |
|
902 # all complete collections at the top of the stack. |
|
903 while True: |
|
904 # Bail if we're not waiting on a new collection item. |
|
905 if self._state not in (self._STATE_WANT_ARRAY_VALUE, |
|
906 self._STATE_WANT_MAP_KEY, |
|
907 self._STATE_WANT_SET_VALUE): |
|
908 break |
|
909 |
|
910 # Or we are expecting more items for this collection. |
|
911 lastc = self._collectionstack[-1] |
|
912 |
|
913 if lastc['remaining']: |
|
914 break |
|
915 |
|
916 # The collection at the top of the stack is complete. |
|
917 |
|
918 # Discard it, as it isn't needed for future items. |
|
919 self._collectionstack.pop() |
|
920 |
|
921 # If this is a nested collection, we don't emit it, since it |
|
922 # will be emitted by its parent collection. But we do need to |
|
923 # update state to reflect what the new top-most collection |
|
924 # on the stack is. |
|
925 if self._collectionstack: |
|
926 self._state = { |
|
927 list: self._STATE_WANT_ARRAY_VALUE, |
|
928 dict: self._STATE_WANT_MAP_KEY, |
|
929 set: self._STATE_WANT_SET_VALUE, |
|
930 }[type(self._collectionstack[-1]['v'])] |
|
931 |
|
932 # If this is the root collection, emit it. |
|
933 else: |
|
934 self._decodedvalues.append(lastc['v']) |
|
935 self._state = self._STATE_NONE |
|
936 |
|
937 return ( |
|
938 bool(self._decodedvalues), |
|
939 offset - initialoffset, |
|
940 0, |
|
941 ) |
|
942 |
|
943 def getavailable(self): |
|
944 """Returns an iterator over fully decoded values. |
|
945 |
|
946 Once values are retrieved, they won't be available on the next call. |
|
947 """ |
|
948 |
|
949 l = list(self._decodedvalues) |
|
950 self._decodedvalues = [] |
|
951 return l |
|
952 |
|
953 def decodeall(b): |
|
954 """Decode all CBOR items present in an iterable of bytes. |
|
955 |
|
956 In addition to regular decode errors, raises CBORDecodeError if the |
|
957 entirety of the passed buffer does not fully decode to complete CBOR |
|
958 values. This includes failure to decode any value, incomplete collection |
|
959 types, incomplete indefinite length items, and extra data at the end of |
|
960 the buffer. |
|
961 """ |
|
962 if not b: |
|
963 return [] |
|
964 |
|
965 decoder = sansiodecoder() |
|
966 |
|
967 havevalues, readcount, wantbytes = decoder.decode(b) |
|
968 |
|
969 if readcount != len(b): |
|
970 raise CBORDecodeError('input data not fully consumed') |
|
971 |
|
972 if decoder.inprogress: |
|
973 raise CBORDecodeError('input data not complete') |
|
974 |
|
975 return decoder.getavailable() |