mercurial/utils/cborutil.py
changeset 39411 aeb551a3bb8a
parent 37898 2ae6a3134362
child 39412 a40d3da89b7d
equal deleted inserted replaced
39410:fcc6bd11444b 39411:aeb551a3bb8a
     6 # GNU General Public License version 2 or any later version.
     6 # GNU General Public License version 2 or any later version.
     7 
     7 
     8 from __future__ import absolute_import
     8 from __future__ import absolute_import
     9 
     9 
    10 import struct
    10 import struct
       
    11 import sys
    11 
    12 
    12 from ..thirdparty.cbor.cbor2 import (
    13 from ..thirdparty.cbor.cbor2 import (
    13     decoder as decodermod,
    14     decoder as decodermod,
    14 )
    15 )
    15 
    16 
    33 MAJOR_TYPE_SEMANTIC = 6
    34 MAJOR_TYPE_SEMANTIC = 6
    34 MAJOR_TYPE_SPECIAL = 7
    35 MAJOR_TYPE_SPECIAL = 7
    35 
    36 
    36 SUBTYPE_MASK = 0b00011111
    37 SUBTYPE_MASK = 0b00011111
    37 
    38 
       
    39 SUBTYPE_FALSE = 20
       
    40 SUBTYPE_TRUE = 21
       
    41 SUBTYPE_NULL = 22
    38 SUBTYPE_HALF_FLOAT = 25
    42 SUBTYPE_HALF_FLOAT = 25
    39 SUBTYPE_SINGLE_FLOAT = 26
    43 SUBTYPE_SINGLE_FLOAT = 26
    40 SUBTYPE_DOUBLE_FLOAT = 27
    44 SUBTYPE_DOUBLE_FLOAT = 27
    41 SUBTYPE_INDEFINITE = 31
    45 SUBTYPE_INDEFINITE = 31
       
    46 
       
    47 SEMANTIC_TAG_FINITE_SET = 258
    42 
    48 
    43 # Indefinite types begin with their major type ORd with information value 31.
    49 # Indefinite types begin with their major type ORd with information value 31.
    44 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
    50 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
    45     r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
    51     r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
    46 BEGIN_INDEFINITE_ARRAY = struct.pack(
    52 BEGIN_INDEFINITE_ARRAY = struct.pack(
   144     return type(v).__name__, v
   150     return type(v).__name__, v
   145 
   151 
   146 def streamencodeset(s):
   152 def streamencodeset(s):
   147     # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
   153     # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
   148     # semantic tag 258 for finite sets.
   154     # semantic tag 258 for finite sets.
   149     yield encodelength(MAJOR_TYPE_SEMANTIC, 258)
   155     yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
   150 
   156 
   151     for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
   157     for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
   152         yield chunk
   158         yield chunk
   153 
   159 
   154 def streamencodemap(d):
   160 def streamencodemap(d):
   258             raise decodermod.CBORDecodeError(
   264             raise decodermod.CBORDecodeError(
   259                 'failed to read bytestring chunk: got %d bytes; expected %d' % (
   265                 'failed to read bytestring chunk: got %d bytes; expected %d' % (
   260                     len(chunk), length))
   266                     len(chunk), length))
   261 
   267 
   262         yield chunk
   268         yield chunk
       
   269 
       
   270 class CBORDecodeError(Exception):
       
   271     """Represents an error decoding CBOR."""
       
   272 
       
   273 if sys.version_info.major >= 3:
       
   274     def _elementtointeger(b, i):
       
   275         return b[i]
       
   276 else:
       
   277     def _elementtointeger(b, i):
       
   278         return ord(b[i])
       
   279 
       
   280 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
       
   281 STRUCT_BIG_USHORT = struct.Struct('>H')
       
   282 STRUCT_BIG_ULONG = struct.Struct('>L')
       
   283 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
       
   284 
       
   285 SPECIAL_NONE = 0
       
   286 SPECIAL_START_INDEFINITE_BYTESTRING = 1
       
   287 SPECIAL_START_ARRAY = 2
       
   288 SPECIAL_START_MAP = 3
       
   289 SPECIAL_START_SET = 4
       
   290 SPECIAL_INDEFINITE_BREAK = 5
       
   291 
       
   292 def decodeitem(b, offset=0):
       
   293     """Decode a new CBOR value from a buffer at offset.
       
   294 
       
   295     This function attempts to decode up to one complete CBOR value
       
   296     from ``b`` starting at offset ``offset``.
       
   297 
       
   298     The beginning of a collection (such as an array, map, set, or
       
   299     indefinite length bytestring) counts as a single value. For these
       
   300     special cases, a state flag will indicate that a special value was seen.
       
   301 
       
   302     When called, the function either returns a decoded value or gives
       
   303     a hint as to how many more bytes are needed to do so. By calling
       
   304     the function repeatedly given a stream of bytes, the caller can
       
   305     build up the original values.
       
   306 
       
   307     Returns a tuple with the following elements:
       
   308 
       
   309     * Bool indicating whether a complete value was decoded.
       
   310     * A decoded value if first value is True otherwise None
       
   311     * Integer number of bytes. If positive, the number of bytes
       
   312       read. If negative, the number of bytes we need to read to
       
   313       decode this value or the next chunk in this value.
       
   314     * One of the ``SPECIAL_*`` constants indicating special treatment
       
   315       for this value. ``SPECIAL_NONE`` means this is a fully decoded
       
   316       simple value (such as an integer or bool).
       
   317     """
       
   318 
       
   319     initial = _elementtointeger(b, offset)
       
   320     offset += 1
       
   321 
       
   322     majortype = initial >> 5
       
   323     subtype = initial & SUBTYPE_MASK
       
   324 
       
   325     if majortype == MAJOR_TYPE_UINT:
       
   326         complete, value, readcount = decodeuint(subtype, b, offset)
       
   327 
       
   328         if complete:
       
   329             return True, value, readcount + 1, SPECIAL_NONE
       
   330         else:
       
   331             return False, None, readcount, SPECIAL_NONE
       
   332 
       
   333     elif majortype == MAJOR_TYPE_NEGINT:
       
   334         # Negative integers are the same as UINT except inverted minus 1.
       
   335         complete, value, readcount = decodeuint(subtype, b, offset)
       
   336 
       
   337         if complete:
       
   338             return True, -value - 1, readcount + 1, SPECIAL_NONE
       
   339         else:
       
   340             return False, None, readcount, SPECIAL_NONE
       
   341 
       
   342     elif majortype == MAJOR_TYPE_BYTESTRING:
       
   343         # Beginning of bytestrings are treated as uints in order to
       
   344         # decode their length, which may be indefinite.
       
   345         complete, size, readcount = decodeuint(subtype, b, offset,
       
   346                                                allowindefinite=True)
       
   347 
       
   348         # We don't know the size of the bytestring. It must be a definitive
       
   349         # length since the indefinite subtype would be encoded in the initial
       
   350         # byte.
       
   351         if not complete:
       
   352             return False, None, readcount, SPECIAL_NONE
       
   353 
       
   354         # We know the length of the bytestring.
       
   355         if size is not None:
       
   356             # And the data is available in the buffer.
       
   357             if offset + readcount + size <= len(b):
       
   358                 value = b[offset + readcount:offset + readcount + size]
       
   359                 return True, value, readcount + size + 1, SPECIAL_NONE
       
   360 
       
   361             # And we need more data in order to return the bytestring.
       
   362             else:
       
   363                 wanted = len(b) - offset - readcount - size
       
   364                 return False, None, wanted, SPECIAL_NONE
       
   365 
       
   366         # It is an indefinite length bytestring.
       
   367         else:
       
   368             return True, None, 1, SPECIAL_START_INDEFINITE_BYTESTRING
       
   369 
       
   370     elif majortype == MAJOR_TYPE_STRING:
       
   371         raise CBORDecodeError('string major type not supported')
       
   372 
       
   373     elif majortype == MAJOR_TYPE_ARRAY:
       
   374         # Beginning of arrays are treated as uints in order to decode their
       
   375         # length. We don't allow indefinite length arrays.
       
   376         complete, size, readcount = decodeuint(subtype, b, offset)
       
   377 
       
   378         if complete:
       
   379             return True, size, readcount + 1, SPECIAL_START_ARRAY
       
   380         else:
       
   381             return False, None, readcount, SPECIAL_NONE
       
   382 
       
   383     elif majortype == MAJOR_TYPE_MAP:
       
   384         # Beginning of maps are treated as uints in order to decode their
       
   385         # number of elements. We don't allow indefinite length arrays.
       
   386         complete, size, readcount = decodeuint(subtype, b, offset)
       
   387 
       
   388         if complete:
       
   389             return True, size, readcount + 1, SPECIAL_START_MAP
       
   390         else:
       
   391             return False, None, readcount, SPECIAL_NONE
       
   392 
       
   393     elif majortype == MAJOR_TYPE_SEMANTIC:
       
   394         # Semantic tag value is read the same as a uint.
       
   395         complete, tagvalue, readcount = decodeuint(subtype, b, offset)
       
   396 
       
   397         if not complete:
       
   398             return False, None, readcount, SPECIAL_NONE
       
   399 
       
   400         # This behavior here is a little wonky. The main type being "decorated"
       
   401         # by this semantic tag follows. A more robust parser would probably emit
       
   402         # a special flag indicating this as a semantic tag and let the caller
       
   403         # deal with the types that follow. But since we don't support many
       
   404         # semantic tags, it is easier to deal with the special cases here and
       
   405         # hide complexity from the caller. If we add support for more semantic
       
   406         # tags, we should probably move semantic tag handling into the caller.
       
   407         if tagvalue == SEMANTIC_TAG_FINITE_SET:
       
   408             if offset + readcount >= len(b):
       
   409                 return False, None, -1, SPECIAL_NONE
       
   410 
       
   411             complete, size, readcount2, special = decodeitem(b,
       
   412                                                              offset + readcount)
       
   413 
       
   414             if not complete:
       
   415                 return False, None, readcount2, SPECIAL_NONE
       
   416 
       
   417             if special != SPECIAL_START_ARRAY:
       
   418                 raise CBORDecodeError('expected array after finite set '
       
   419                                       'semantic tag')
       
   420 
       
   421             return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
       
   422 
       
   423         else:
       
   424             raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
       
   425 
       
   426     elif majortype == MAJOR_TYPE_SPECIAL:
       
   427         # Only specific values for the information field are allowed.
       
   428         if subtype == SUBTYPE_FALSE:
       
   429             return True, False, 1, SPECIAL_NONE
       
   430         elif subtype == SUBTYPE_TRUE:
       
   431             return True, True, 1, SPECIAL_NONE
       
   432         elif subtype == SUBTYPE_NULL:
       
   433             return True, None, 1, SPECIAL_NONE
       
   434         elif subtype == SUBTYPE_INDEFINITE:
       
   435             return True, None, 1, SPECIAL_INDEFINITE_BREAK
       
   436         # If value is 24, subtype is in next byte.
       
   437         else:
       
   438             raise CBORDecodeError('special type %d not allowed' % subtype)
       
   439     else:
       
   440         assert False
       
   441 
       
   442 def decodeuint(subtype, b, offset=0, allowindefinite=False):
       
   443     """Decode an unsigned integer.
       
   444 
       
   445     ``subtype`` is the lower 5 bits from the initial byte CBOR item
       
   446     "header." ``b`` is a buffer containing bytes. ``offset`` points to
       
   447     the index of the first byte after the byte that ``subtype`` was
       
   448     derived from.
       
   449 
       
   450     ``allowindefinite`` allows the special indefinite length value
       
   451     indicator.
       
   452 
       
   453     Returns a 3-tuple of (successful, value, count).
       
   454 
       
   455     The first element is a bool indicating if decoding completed. The 2nd
       
   456     is the decoded integer value or None if not fully decoded or the subtype
       
   457     is 31 and ``allowindefinite`` is True. The 3rd value is the count of bytes.
       
   458     If positive, it is the number of additional bytes decoded. If negative,
       
   459     it is the number of additional bytes needed to decode this value.
       
   460     """
       
   461 
       
   462     # Small values are inline.
       
   463     if subtype < 24:
       
   464         return True, subtype, 0
       
   465     # Indefinite length specifier.
       
   466     elif subtype == 31:
       
   467         if allowindefinite:
       
   468             return True, None, 0
       
   469         else:
       
   470             raise CBORDecodeError('indefinite length uint not allowed here')
       
   471     elif subtype >= 28:
       
   472         raise CBORDecodeError('unsupported subtype on integer type: %d' %
       
   473                               subtype)
       
   474 
       
   475     if subtype == 24:
       
   476         s = STRUCT_BIG_UBYTE
       
   477     elif subtype == 25:
       
   478         s = STRUCT_BIG_USHORT
       
   479     elif subtype == 26:
       
   480         s = STRUCT_BIG_ULONG
       
   481     elif subtype == 27:
       
   482         s = STRUCT_BIG_ULONGLONG
       
   483     else:
       
   484         raise CBORDecodeError('bounds condition checking violation')
       
   485 
       
   486     if len(b) - offset >= s.size:
       
   487         return True, s.unpack_from(b, offset)[0], s.size
       
   488     else:
       
   489         return False, None, len(b) - offset - s.size
       
   490 
       
   491 class bytestringchunk(bytes):
       
   492     """Represents a chunk/segment in an indefinite length bytestring.
       
   493 
       
   494     This behaves like a ``bytes`` but in addition has the ``isfirst``
       
   495     and ``islast`` attributes indicating whether this chunk is the first
       
   496     or last in an indefinite length bytestring.
       
   497     """
       
   498 
       
   499     def __new__(cls, v, first=False, last=False):
       
   500         self = bytes.__new__(cls, v)
       
   501         self.isfirst = first
       
   502         self.islast = last
       
   503 
       
   504         return self
       
   505 
       
   506 class sansiodecoder(object):
       
   507     """A CBOR decoder that doesn't perform its own I/O.
       
   508 
       
   509     To use, construct an instance and feed it segments containing
       
   510     CBOR-encoded bytes via ``decode()``. The return value from ``decode()``
       
   511     indicates whether a fully-decoded value is available, how many bytes
       
   512     were consumed, and offers a hint as to how many bytes should be fed
       
   513     in next time to decode the next value.
       
   514 
       
   515     The decoder assumes it will decode N discrete CBOR values, not just
       
   516     a single value. i.e. if the bytestream contains uints packed one after
       
   517     the other, the decoder will decode them all, rather than just the initial
       
   518     one.
       
   519 
       
   520     When ``decode()`` indicates a value is available, call ``getavailable()``
       
   521     to return all fully decoded values.
       
   522 
       
   523     ``decode()`` can partially decode input. It is up to the caller to keep
       
   524     track of what data was consumed and to pass unconsumed data in on the
       
   525     next invocation.
       
   526 
       
   527     The decoder decodes atomically at the *item* level. See ``decodeitem()``.
       
   528     If an *item* cannot be fully decoded, the decoder won't record it as
       
   529     partially consumed. Instead, the caller will be instructed to pass in
       
   530     the initial bytes of this item on the next invocation. This does result
       
   531     in some redundant parsing. But the overhead should be minimal.
       
   532 
       
   533     This decoder only supports a subset of CBOR as required by Mercurial.
       
   534     It lacks support for:
       
   535 
       
   536     * Indefinite length arrays
       
   537     * Indefinite length maps
       
   538     * Use of indefinite length bytestrings as keys or values within
       
   539       arrays, maps, or sets.
       
   540     * Nested arrays, maps, or sets within sets
       
   541     * Any semantic tag that isn't a mathematical finite set
       
   542     * Floating point numbers
       
   543     * Undefined special value
       
   544 
       
   545     CBOR types are decoded to Python types as follows:
       
   546 
       
   547     uint -> int
       
   548     negint -> int
       
   549     bytestring -> bytes
       
   550     map -> dict
       
   551     array -> list
       
   552     True -> bool
       
   553     False -> bool
       
   554     null -> None
       
   555     indefinite length bytestring chunk -> [bytestringchunk]
       
   556 
       
   557     The only non-obvious mapping here is an indefinite length bytestring
       
   558     to the ``bytestringchunk`` type. This is to facilitate streaming
       
   559     indefinite length bytestrings out of the decoder and to differentiate
       
   560     a regular bytestring from an indefinite length bytestring.
       
   561     """
       
   562 
       
   563     _STATE_NONE = 0
       
   564     _STATE_WANT_MAP_KEY = 1
       
   565     _STATE_WANT_MAP_VALUE = 2
       
   566     _STATE_WANT_ARRAY_VALUE = 3
       
   567     _STATE_WANT_SET_VALUE = 4
       
   568     _STATE_WANT_BYTESTRING_CHUNK_FIRST = 5
       
   569     _STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT = 6
       
   570 
       
   571     def __init__(self):
       
   572         # TODO add support for limiting size of bytestrings
       
   573         # TODO add support for limiting number of keys / values in collections
       
   574         # TODO add support for limiting size of buffered partial values
       
   575 
       
   576         self.decodedbytecount = 0
       
   577 
       
   578         self._state = self._STATE_NONE
       
   579 
       
   580         # Stack of active nested collections. Each entry is a dict describing
       
   581         # the collection.
       
   582         self._collectionstack = []
       
   583 
       
   584         # Fully decoded key to use for the current map.
       
   585         self._currentmapkey = None
       
   586 
       
   587         # Fully decoded values available for retrieval.
       
   588         self._decodedvalues = []
       
   589 
       
   590     @property
       
   591     def inprogress(self):
       
   592         """Whether the decoder has partially decoded a value."""
       
   593         return self._state != self._STATE_NONE
       
   594 
       
   595     def decode(self, b, offset=0):
       
   596         """Attempt to decode bytes from an input buffer.
       
   597 
       
   598         ``b`` is a collection of bytes and ``offset`` is the byte
       
   599         offset within that buffer from which to begin reading data.
       
   600 
       
   601         ``b`` must support ``len()`` and accessing bytes slices via
       
   602         ``__slice__``. Typically ``bytes`` instances are used.
       
   603 
       
   604         Returns a tuple with the following fields:
       
   605 
       
   606         * Bool indicating whether values are available for retrieval.
       
   607         * Integer indicating the number of bytes that were fully consumed,
       
   608           starting from ``offset``.
       
   609         * Integer indicating the number of bytes that are desired for the
       
   610           next call in order to decode an item.
       
   611         """
       
   612         if not b:
       
   613             return bool(self._decodedvalues), 0, 0
       
   614 
       
   615         initialoffset = offset
       
   616 
       
   617         # We could easily split the body of this loop into a function. But
       
   618         # Python performance is sensitive to function calls and collections
       
   619         # are composed of many items. So leaving as a while loop could help
       
   620         # with performance. One thing that may not help is the use of
       
   621         # if..elif versus a lookup/dispatch table. There may be value
       
   622         # in switching that.
       
   623         while offset < len(b):
       
   624             # Attempt to decode an item. This could be a whole value or a
       
   625             # special value indicating an event, such as start or end of a
       
   626             # collection or indefinite length type.
       
   627             complete, value, readcount, special = decodeitem(b, offset)
       
   628 
       
   629             if readcount > 0:
       
   630                 self.decodedbytecount += readcount
       
   631 
       
   632             if not complete:
       
   633                 assert readcount < 0
       
   634                 return (
       
   635                     bool(self._decodedvalues),
       
   636                     offset - initialoffset,
       
   637                     -readcount,
       
   638                 )
       
   639 
       
   640             offset += readcount
       
   641 
       
   642             # No nested state. We either have a full value or beginning of a
       
   643             # complex value to deal with.
       
   644             if self._state == self._STATE_NONE:
       
   645                 # A normal value.
       
   646                 if special == SPECIAL_NONE:
       
   647                     self._decodedvalues.append(value)
       
   648 
       
   649                 elif special == SPECIAL_START_ARRAY:
       
   650                     self._collectionstack.append({
       
   651                         'remaining': value,
       
   652                         'v': [],
       
   653                     })
       
   654                     self._state = self._STATE_WANT_ARRAY_VALUE
       
   655 
       
   656                 elif special == SPECIAL_START_MAP:
       
   657                     self._collectionstack.append({
       
   658                         'remaining': value,
       
   659                         'v': {},
       
   660                     })
       
   661                     self._state = self._STATE_WANT_MAP_KEY
       
   662 
       
   663                 elif special == SPECIAL_START_SET:
       
   664                     self._collectionstack.append({
       
   665                         'remaining': value,
       
   666                         'v': set(),
       
   667                     })
       
   668                     self._state = self._STATE_WANT_SET_VALUE
       
   669 
       
   670                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
       
   671                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
       
   672 
       
   673                 else:
       
   674                     raise CBORDecodeError('unhandled special state: %d' %
       
   675                                           special)
       
   676 
       
   677             # This value becomes an element of the current array.
       
   678             elif self._state == self._STATE_WANT_ARRAY_VALUE:
       
   679                 # Simple values get appended.
       
   680                 if special == SPECIAL_NONE:
       
   681                     c = self._collectionstack[-1]
       
   682                     c['v'].append(value)
       
   683                     c['remaining'] -= 1
       
   684 
       
   685                     # self._state doesn't need changed.
       
   686 
       
   687                 # An array nested within an array.
       
   688                 elif special == SPECIAL_START_ARRAY:
       
   689                     lastc = self._collectionstack[-1]
       
   690                     newvalue = []
       
   691 
       
   692                     lastc['v'].append(newvalue)
       
   693                     lastc['remaining'] -= 1
       
   694 
       
   695                     self._collectionstack.append({
       
   696                         'remaining': value,
       
   697                         'v': newvalue,
       
   698                     })
       
   699 
       
   700                     # self._state doesn't need changed.
       
   701 
       
   702                 # A map nested within an array.
       
   703                 elif special == SPECIAL_START_MAP:
       
   704                     lastc = self._collectionstack[-1]
       
   705                     newvalue = {}
       
   706 
       
   707                     lastc['v'].append(newvalue)
       
   708                     lastc['remaining'] -= 1
       
   709 
       
   710                     self._collectionstack.append({
       
   711                         'remaining': value,
       
   712                         'v': newvalue
       
   713                     })
       
   714 
       
   715                     self._state = self._STATE_WANT_MAP_KEY
       
   716 
       
   717                 elif special == SPECIAL_START_SET:
       
   718                     lastc = self._collectionstack[-1]
       
   719                     newvalue = set()
       
   720 
       
   721                     lastc['v'].append(newvalue)
       
   722                     lastc['remaining'] -= 1
       
   723 
       
   724                     self._collectionstack.append({
       
   725                         'remaining': value,
       
   726                         'v': newvalue,
       
   727                     })
       
   728 
       
   729                     self._state = self._STATE_WANT_SET_VALUE
       
   730 
       
   731                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
       
   732                     raise CBORDecodeError('indefinite length bytestrings '
       
   733                                           'not allowed as array values')
       
   734 
       
   735                 else:
       
   736                     raise CBORDecodeError('unhandled special item when '
       
   737                                           'expecting array value: %d' % special)
       
   738 
       
   739             # This value becomes the key of the current map instance.
       
   740             elif self._state == self._STATE_WANT_MAP_KEY:
       
   741                 if special == SPECIAL_NONE:
       
   742                     self._currentmapkey = value
       
   743                     self._state = self._STATE_WANT_MAP_VALUE
       
   744 
       
   745                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
       
   746                     raise CBORDecodeError('indefinite length bytestrings '
       
   747                                           'not allowed as map keys')
       
   748 
       
   749                 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP,
       
   750                                  SPECIAL_START_SET):
       
   751                     raise CBORDecodeError('collections not supported as map '
       
   752                                           'keys')
       
   753 
       
   754                 # We do not allow special values to be used as map keys.
       
   755                 else:
       
   756                     raise CBORDecodeError('unhandled special item when '
       
   757                                           'expecting map key: %d' % special)
       
   758 
       
   759             # This value becomes the value of the current map key.
       
   760             elif self._state == self._STATE_WANT_MAP_VALUE:
       
   761                 # Simple values simply get inserted into the map.
       
   762                 if special == SPECIAL_NONE:
       
   763                     lastc = self._collectionstack[-1]
       
   764                     lastc['v'][self._currentmapkey] = value
       
   765                     lastc['remaining'] -= 1
       
   766 
       
   767                     self._state = self._STATE_WANT_MAP_KEY
       
   768 
       
   769                 # A new array is used as the map value.
       
   770                 elif special == SPECIAL_START_ARRAY:
       
   771                     lastc = self._collectionstack[-1]
       
   772                     newvalue = []
       
   773 
       
   774                     lastc['v'][self._currentmapkey] = newvalue
       
   775                     lastc['remaining'] -= 1
       
   776 
       
   777                     self._collectionstack.append({
       
   778                         'remaining': value,
       
   779                         'v': newvalue,
       
   780                     })
       
   781 
       
   782                     self._state = self._STATE_WANT_ARRAY_VALUE
       
   783 
       
   784                 # A new map is used as the map value.
       
   785                 elif special == SPECIAL_START_MAP:
       
   786                     lastc = self._collectionstack[-1]
       
   787                     newvalue = {}
       
   788 
       
   789                     lastc['v'][self._currentmapkey] = newvalue
       
   790                     lastc['remaining'] -= 1
       
   791 
       
   792                     self._collectionstack.append({
       
   793                         'remaining': value,
       
   794                         'v': newvalue,
       
   795                     })
       
   796 
       
   797                     self._state = self._STATE_WANT_MAP_KEY
       
   798 
       
   799                 # A new set is used as the map value.
       
   800                 elif special == SPECIAL_START_SET:
       
   801                     lastc = self._collectionstack[-1]
       
   802                     newvalue = set()
       
   803 
       
   804                     lastc['v'][self._currentmapkey] = newvalue
       
   805                     lastc['remaining'] -= 1
       
   806 
       
   807                     self._collectionstack.append({
       
   808                         'remaining': value,
       
   809                         'v': newvalue,
       
   810                     })
       
   811 
       
   812                     self._state = self._STATE_WANT_SET_VALUE
       
   813 
       
   814                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
       
   815                     raise CBORDecodeError('indefinite length bytestrings not '
       
   816                                           'allowed as map values')
       
   817 
       
   818                 else:
       
   819                     raise CBORDecodeError('unhandled special item when '
       
   820                                           'expecting map value: %d' % special)
       
   821 
       
   822                 self._currentmapkey = None
       
   823 
       
   824             # This value is added to the current set.
       
   825             elif self._state == self._STATE_WANT_SET_VALUE:
       
   826                 if special == SPECIAL_NONE:
       
   827                     lastc = self._collectionstack[-1]
       
   828                     lastc['v'].add(value)
       
   829                     lastc['remaining'] -= 1
       
   830 
       
   831                 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
       
   832                     raise CBORDecodeError('indefinite length bytestrings not '
       
   833                                           'allowed as set values')
       
   834 
       
   835                 elif special in (SPECIAL_START_ARRAY,
       
   836                                  SPECIAL_START_MAP,
       
   837                                  SPECIAL_START_SET):
       
   838                     raise CBORDecodeError('collections not allowed as set '
       
   839                                           'values')
       
   840 
       
   841                 # We don't allow non-trivial types to exist as set values.
       
   842                 else:
       
   843                     raise CBORDecodeError('unhandled special item when '
       
   844                                           'expecting set value: %d' % special)
       
   845 
       
   846             # This value represents the first chunk in an indefinite length
       
   847             # bytestring.
       
   848             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
       
   849                 # We received a full chunk.
       
   850                 if special == SPECIAL_NONE:
       
   851                     self._decodedvalues.append(bytestringchunk(value,
       
   852                                                                first=True))
       
   853 
       
   854                     self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
       
   855 
       
   856                 # The end of stream marker. This means it is an empty
       
   857                 # indefinite length bytestring.
       
   858                 elif special == SPECIAL_INDEFINITE_BREAK:
       
   859                     # We /could/ convert this to a b''. But we want to preserve
       
   860                     # the nature of the underlying data so consumers expecting
       
   861                     # an indefinite length bytestring get one.
       
   862                     self._decodedvalues.append(bytestringchunk(b'',
       
   863                                                                first=True,
       
   864                                                                last=True))
       
   865 
       
   866                     # Since indefinite length bytestrings can't be used in
       
   867                     # collections, we must be at the root level.
       
   868                     assert not self._collectionstack
       
   869                     self._state = self._STATE_NONE
       
   870 
       
   871                 else:
       
   872                     raise CBORDecodeError('unexpected special value when '
       
   873                                           'expecting bytestring chunk: %d' %
       
   874                                           special)
       
   875 
       
   876             # This value represents the non-initial chunk in an indefinite
       
   877             # length bytestring.
       
   878             elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
       
   879                 # We received a full chunk.
       
   880                 if special == SPECIAL_NONE:
       
   881                     self._decodedvalues.append(bytestringchunk(value))
       
   882 
       
   883                 # The end of stream marker.
       
   884                 elif special == SPECIAL_INDEFINITE_BREAK:
       
   885                     self._decodedvalues.append(bytestringchunk(b'', last=True))
       
   886 
       
   887                     # Since indefinite length bytestrings can't be used in
       
   888                     # collections, we must be at the root level.
       
   889                     assert not self._collectionstack
       
   890                     self._state = self._STATE_NONE
       
   891 
       
   892                 else:
       
   893                     raise CBORDecodeError('unexpected special value when '
       
   894                                           'expecting bytestring chunk: %d' %
       
   895                                           special)
       
   896 
       
   897             else:
       
   898                 raise CBORDecodeError('unhandled decoder state: %d' %
       
   899                                       self._state)
       
   900 
       
   901             # We could have just added the final value in a collection. End
       
   902             # all complete collections at the top of the stack.
       
   903             while True:
       
   904                 # Bail if we're not waiting on a new collection item.
       
   905                 if self._state not in (self._STATE_WANT_ARRAY_VALUE,
       
   906                                        self._STATE_WANT_MAP_KEY,
       
   907                                        self._STATE_WANT_SET_VALUE):
       
   908                     break
       
   909 
       
   910                 # Or we are expecting more items for this collection.
       
   911                 lastc = self._collectionstack[-1]
       
   912 
       
   913                 if lastc['remaining']:
       
   914                     break
       
   915 
       
   916                 # The collection at the top of the stack is complete.
       
   917 
       
   918                 # Discard it, as it isn't needed for future items.
       
   919                 self._collectionstack.pop()
       
   920 
       
   921                 # If this is a nested collection, we don't emit it, since it
       
   922                 # will be emitted by its parent collection. But we do need to
       
   923                 # update state to reflect what the new top-most collection
       
   924                 # on the stack is.
       
   925                 if self._collectionstack:
       
   926                     self._state = {
       
   927                         list: self._STATE_WANT_ARRAY_VALUE,
       
   928                         dict: self._STATE_WANT_MAP_KEY,
       
   929                         set: self._STATE_WANT_SET_VALUE,
       
   930                     }[type(self._collectionstack[-1]['v'])]
       
   931 
       
   932                 # If this is the root collection, emit it.
       
   933                 else:
       
   934                     self._decodedvalues.append(lastc['v'])
       
   935                     self._state = self._STATE_NONE
       
   936 
       
   937         return (
       
   938             bool(self._decodedvalues),
       
   939             offset - initialoffset,
       
   940             0,
       
   941         )
       
   942 
       
   943     def getavailable(self):
       
   944         """Returns an iterator over fully decoded values.
       
   945 
       
   946         Once values are retrieved, they won't be available on the next call.
       
   947         """
       
   948 
       
   949         l = list(self._decodedvalues)
       
   950         self._decodedvalues = []
       
   951         return l
       
   952 
       
   953 def decodeall(b):
       
   954     """Decode all CBOR items present in an iterable of bytes.
       
   955 
       
   956     In addition to regular decode errors, raises CBORDecodeError if the
       
   957     entirety of the passed buffer does not fully decode to complete CBOR
       
   958     values. This includes failure to decode any value, incomplete collection
       
   959     types, incomplete indefinite length items, and extra data at the end of
       
   960     the buffer.
       
   961     """
       
   962     if not b:
       
   963         return []
       
   964 
       
   965     decoder = sansiodecoder()
       
   966 
       
   967     havevalues, readcount, wantbytes = decoder.decode(b)
       
   968 
       
   969     if readcount != len(b):
       
   970         raise CBORDecodeError('input data not fully consumed')
       
   971 
       
   972     if decoder.inprogress:
       
   973         raise CBORDecodeError('input data not complete')
       
   974 
       
   975     return decoder.getavailable()