comparison mercurial/utils/cborutil.py @ 43076:2372284d9457

formatting: blacken the codebase This is using my patch to black (https://github.com/psf/black/pull/826) so we don't un-wrap collection literals. Done with: hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S # skip-blame mass-reformatting only # no-check-commit reformats foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6971
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:45:02 -0400
parents b6387a65851d
children 687b865b95ad
comparison
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
44 44
45 SEMANTIC_TAG_FINITE_SET = 258 45 SEMANTIC_TAG_FINITE_SET = 258
46 46
47 # Indefinite types begin with their major type ORd with information value 31. 47 # Indefinite types begin with their major type ORd with information value 31.
48 BEGIN_INDEFINITE_BYTESTRING = struct.pack( 48 BEGIN_INDEFINITE_BYTESTRING = struct.pack(
49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE) 49 r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE
50 )
50 BEGIN_INDEFINITE_ARRAY = struct.pack( 51 BEGIN_INDEFINITE_ARRAY = struct.pack(
51 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE) 52 r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE
53 )
52 BEGIN_INDEFINITE_MAP = struct.pack( 54 BEGIN_INDEFINITE_MAP = struct.pack(
53 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE) 55 r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE
56 )
54 57
55 ENCODED_LENGTH_1 = struct.Struct(r'>B') 58 ENCODED_LENGTH_1 = struct.Struct(r'>B')
56 ENCODED_LENGTH_2 = struct.Struct(r'>BB') 59 ENCODED_LENGTH_2 = struct.Struct(r'>BB')
57 ENCODED_LENGTH_3 = struct.Struct(r'>BH') 60 ENCODED_LENGTH_3 = struct.Struct(r'>BH')
58 ENCODED_LENGTH_4 = struct.Struct(r'>BL') 61 ENCODED_LENGTH_4 = struct.Struct(r'>BL')
59 ENCODED_LENGTH_5 = struct.Struct(r'>BQ') 62 ENCODED_LENGTH_5 = struct.Struct(r'>BQ')
60 63
61 # The break ends an indefinite length item. 64 # The break ends an indefinite length item.
62 BREAK = b'\xff' 65 BREAK = b'\xff'
63 BREAK_INT = 255 66 BREAK_INT = 255
67
64 68
65 def encodelength(majortype, length): 69 def encodelength(majortype, length):
66 """Obtain a value encoding the major type and its length.""" 70 """Obtain a value encoding the major type and its length."""
67 if length < 24: 71 if length < 24:
68 return ENCODED_LENGTH_1.pack(majortype << 5 | length) 72 return ENCODED_LENGTH_1.pack(majortype << 5 | length)
73 elif length < 4294967296: 77 elif length < 4294967296:
74 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length) 78 return ENCODED_LENGTH_4.pack(majortype << 5 | 26, length)
75 else: 79 else:
76 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length) 80 return ENCODED_LENGTH_5.pack(majortype << 5 | 27, length)
77 81
82
78 def streamencodebytestring(v): 83 def streamencodebytestring(v):
79 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v)) 84 yield encodelength(MAJOR_TYPE_BYTESTRING, len(v))
80 yield v 85 yield v
86
81 87
82 def streamencodebytestringfromiter(it): 88 def streamencodebytestringfromiter(it):
83 """Convert an iterator of chunks to an indefinite bytestring. 89 """Convert an iterator of chunks to an indefinite bytestring.
84 90
85 Given an input that is iterable and each element in the iterator is 91 Given an input that is iterable and each element in the iterator is
91 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) 97 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
92 yield chunk 98 yield chunk
93 99
94 yield BREAK 100 yield BREAK
95 101
102
96 def streamencodeindefinitebytestring(source, chunksize=65536): 103 def streamencodeindefinitebytestring(source, chunksize=65536):
97 """Given a large source buffer, emit as an indefinite length bytestring. 104 """Given a large source buffer, emit as an indefinite length bytestring.
98 105
99 This is a generator of chunks constituting the encoded CBOR data. 106 This is a generator of chunks constituting the encoded CBOR data.
100 """ 107 """
102 109
103 i = 0 110 i = 0
104 l = len(source) 111 l = len(source)
105 112
106 while True: 113 while True:
107 chunk = source[i:i + chunksize] 114 chunk = source[i : i + chunksize]
108 i += len(chunk) 115 i += len(chunk)
109 116
110 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk)) 117 yield encodelength(MAJOR_TYPE_BYTESTRING, len(chunk))
111 yield chunk 118 yield chunk
112 119
113 if i >= l: 120 if i >= l:
114 break 121 break
115 122
116 yield BREAK 123 yield BREAK
124
117 125
118 def streamencodeint(v): 126 def streamencodeint(v):
119 if v >= 18446744073709551616 or v < -18446744073709551616: 127 if v >= 18446744073709551616 or v < -18446744073709551616:
120 raise ValueError('big integers not supported') 128 raise ValueError('big integers not supported')
121 129
122 if v >= 0: 130 if v >= 0:
123 yield encodelength(MAJOR_TYPE_UINT, v) 131 yield encodelength(MAJOR_TYPE_UINT, v)
124 else: 132 else:
125 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1) 133 yield encodelength(MAJOR_TYPE_NEGINT, abs(v) - 1)
126 134
135
127 def streamencodearray(l): 136 def streamencodearray(l):
128 """Encode a known size iterable to an array.""" 137 """Encode a known size iterable to an array."""
129 138
130 yield encodelength(MAJOR_TYPE_ARRAY, len(l)) 139 yield encodelength(MAJOR_TYPE_ARRAY, len(l))
131 140
132 for i in l: 141 for i in l:
133 for chunk in streamencode(i): 142 for chunk in streamencode(i):
134 yield chunk 143 yield chunk
135 144
145
136 def streamencodearrayfromiter(it): 146 def streamencodearrayfromiter(it):
137 """Encode an iterator of items to an indefinite length array.""" 147 """Encode an iterator of items to an indefinite length array."""
138 148
139 yield BEGIN_INDEFINITE_ARRAY 149 yield BEGIN_INDEFINITE_ARRAY
140 150
142 for chunk in streamencode(i): 152 for chunk in streamencode(i):
143 yield chunk 153 yield chunk
144 154
145 yield BREAK 155 yield BREAK
146 156
157
147 def _mixedtypesortkey(v): 158 def _mixedtypesortkey(v):
148 return type(v).__name__, v 159 return type(v).__name__, v
160
149 161
150 def streamencodeset(s): 162 def streamencodeset(s):
151 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines 163 # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml defines
152 # semantic tag 258 for finite sets. 164 # semantic tag 258 for finite sets.
153 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET) 165 yield encodelength(MAJOR_TYPE_SEMANTIC, SEMANTIC_TAG_FINITE_SET)
154 166
155 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)): 167 for chunk in streamencodearray(sorted(s, key=_mixedtypesortkey)):
156 yield chunk 168 yield chunk
157 169
170
158 def streamencodemap(d): 171 def streamencodemap(d):
159 """Encode dictionary to a generator. 172 """Encode dictionary to a generator.
160 173
161 Does not supporting indefinite length dictionaries. 174 Does not supporting indefinite length dictionaries.
162 """ 175 """
163 yield encodelength(MAJOR_TYPE_MAP, len(d)) 176 yield encodelength(MAJOR_TYPE_MAP, len(d))
164 177
165 for key, value in sorted(d.iteritems(), 178 for key, value in sorted(
166 key=lambda x: _mixedtypesortkey(x[0])): 179 d.iteritems(), key=lambda x: _mixedtypesortkey(x[0])
180 ):
167 for chunk in streamencode(key): 181 for chunk in streamencode(key):
168 yield chunk 182 yield chunk
169 for chunk in streamencode(value): 183 for chunk in streamencode(value):
170 yield chunk 184 yield chunk
185
171 186
172 def streamencodemapfromiter(it): 187 def streamencodemapfromiter(it):
173 """Given an iterable of (key, value), encode to an indefinite length map.""" 188 """Given an iterable of (key, value), encode to an indefinite length map."""
174 yield BEGIN_INDEFINITE_MAP 189 yield BEGIN_INDEFINITE_MAP
175 190
179 for chunk in streamencode(value): 194 for chunk in streamencode(value):
180 yield chunk 195 yield chunk
181 196
182 yield BREAK 197 yield BREAK
183 198
199
184 def streamencodebool(b): 200 def streamencodebool(b):
185 # major type 7, simple value 20 and 21. 201 # major type 7, simple value 20 and 21.
186 yield b'\xf5' if b else b'\xf4' 202 yield b'\xf5' if b else b'\xf4'
187 203
204
188 def streamencodenone(v): 205 def streamencodenone(v):
189 # major type 7, simple value 22. 206 # major type 7, simple value 22.
190 yield b'\xf6' 207 yield b'\xf6'
208
191 209
192 STREAM_ENCODERS = { 210 STREAM_ENCODERS = {
193 bytes: streamencodebytestring, 211 bytes: streamencodebytestring,
194 int: streamencodeint, 212 int: streamencodeint,
195 pycompat.long: streamencodeint, 213 pycompat.long: streamencodeint,
199 set: streamencodeset, 217 set: streamencodeset,
200 bool: streamencodebool, 218 bool: streamencodebool,
201 type(None): streamencodenone, 219 type(None): streamencodenone,
202 } 220 }
203 221
222
204 def streamencode(v): 223 def streamencode(v):
205 """Encode a value in a streaming manner. 224 """Encode a value in a streaming manner.
206 225
207 Given an input object, encode it to CBOR recursively. 226 Given an input object, encode it to CBOR recursively.
208 227
224 if not fn: 243 if not fn:
225 raise ValueError('do not know how to encode %s' % type(v)) 244 raise ValueError('do not know how to encode %s' % type(v))
226 245
227 return fn(v) 246 return fn(v)
228 247
248
229 class CBORDecodeError(Exception): 249 class CBORDecodeError(Exception):
230 """Represents an error decoding CBOR.""" 250 """Represents an error decoding CBOR."""
231 251
252
232 if sys.version_info.major >= 3: 253 if sys.version_info.major >= 3:
254
233 def _elementtointeger(b, i): 255 def _elementtointeger(b, i):
234 return b[i] 256 return b[i]
257
258
235 else: 259 else:
260
236 def _elementtointeger(b, i): 261 def _elementtointeger(b, i):
237 return ord(b[i]) 262 return ord(b[i])
263
238 264
239 STRUCT_BIG_UBYTE = struct.Struct(r'>B') 265 STRUCT_BIG_UBYTE = struct.Struct(r'>B')
240 STRUCT_BIG_USHORT = struct.Struct('>H') 266 STRUCT_BIG_USHORT = struct.Struct('>H')
241 STRUCT_BIG_ULONG = struct.Struct('>L') 267 STRUCT_BIG_ULONG = struct.Struct('>L')
242 STRUCT_BIG_ULONGLONG = struct.Struct('>Q') 268 STRUCT_BIG_ULONGLONG = struct.Struct('>Q')
245 SPECIAL_START_INDEFINITE_BYTESTRING = 1 271 SPECIAL_START_INDEFINITE_BYTESTRING = 1
246 SPECIAL_START_ARRAY = 2 272 SPECIAL_START_ARRAY = 2
247 SPECIAL_START_MAP = 3 273 SPECIAL_START_MAP = 3
248 SPECIAL_START_SET = 4 274 SPECIAL_START_SET = 4
249 SPECIAL_INDEFINITE_BREAK = 5 275 SPECIAL_INDEFINITE_BREAK = 5
276
250 277
251 def decodeitem(b, offset=0): 278 def decodeitem(b, offset=0):
252 """Decode a new CBOR value from a buffer at offset. 279 """Decode a new CBOR value from a buffer at offset.
253 280
254 This function attempts to decode up to one complete CBOR value 281 This function attempts to decode up to one complete CBOR value
299 return False, None, readcount, SPECIAL_NONE 326 return False, None, readcount, SPECIAL_NONE
300 327
301 elif majortype == MAJOR_TYPE_BYTESTRING: 328 elif majortype == MAJOR_TYPE_BYTESTRING:
302 # Beginning of bytestrings are treated as uints in order to 329 # Beginning of bytestrings are treated as uints in order to
303 # decode their length, which may be indefinite. 330 # decode their length, which may be indefinite.
304 complete, size, readcount = decodeuint(subtype, b, offset, 331 complete, size, readcount = decodeuint(
305 allowindefinite=True) 332 subtype, b, offset, allowindefinite=True
333 )
306 334
307 # We don't know the size of the bytestring. It must be a definitive 335 # We don't know the size of the bytestring. It must be a definitive
308 # length since the indefinite subtype would be encoded in the initial 336 # length since the indefinite subtype would be encoded in the initial
309 # byte. 337 # byte.
310 if not complete: 338 if not complete:
312 340
313 # We know the length of the bytestring. 341 # We know the length of the bytestring.
314 if size is not None: 342 if size is not None:
315 # And the data is available in the buffer. 343 # And the data is available in the buffer.
316 if offset + readcount + size <= len(b): 344 if offset + readcount + size <= len(b):
317 value = b[offset + readcount:offset + readcount + size] 345 value = b[offset + readcount : offset + readcount + size]
318 return True, value, readcount + size + 1, SPECIAL_NONE 346 return True, value, readcount + size + 1, SPECIAL_NONE
319 347
320 # And we need more data in order to return the bytestring. 348 # And we need more data in order to return the bytestring.
321 else: 349 else:
322 wanted = len(b) - offset - readcount - size 350 wanted = len(b) - offset - readcount - size
365 # tags, we should probably move semantic tag handling into the caller. 393 # tags, we should probably move semantic tag handling into the caller.
366 if tagvalue == SEMANTIC_TAG_FINITE_SET: 394 if tagvalue == SEMANTIC_TAG_FINITE_SET:
367 if offset + readcount >= len(b): 395 if offset + readcount >= len(b):
368 return False, None, -1, SPECIAL_NONE 396 return False, None, -1, SPECIAL_NONE
369 397
370 complete, size, readcount2, special = decodeitem(b, 398 complete, size, readcount2, special = decodeitem(
371 offset + readcount) 399 b, offset + readcount
400 )
372 401
373 if not complete: 402 if not complete:
374 return False, None, readcount2, SPECIAL_NONE 403 return False, None, readcount2, SPECIAL_NONE
375 404
376 if special != SPECIAL_START_ARRAY: 405 if special != SPECIAL_START_ARRAY:
377 raise CBORDecodeError('expected array after finite set ' 406 raise CBORDecodeError(
378 'semantic tag') 407 'expected array after finite set ' 'semantic tag'
408 )
379 409
380 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET 410 return True, size, readcount + readcount2 + 1, SPECIAL_START_SET
381 411
382 else: 412 else:
383 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue) 413 raise CBORDecodeError('semantic tag %d not allowed' % tagvalue)
396 else: 426 else:
397 raise CBORDecodeError('special type %d not allowed' % subtype) 427 raise CBORDecodeError('special type %d not allowed' % subtype)
398 else: 428 else:
399 assert False 429 assert False
400 430
431
401 def decodeuint(subtype, b, offset=0, allowindefinite=False): 432 def decodeuint(subtype, b, offset=0, allowindefinite=False):
402 """Decode an unsigned integer. 433 """Decode an unsigned integer.
403 434
404 ``subtype`` is the lower 5 bits from the initial byte CBOR item 435 ``subtype`` is the lower 5 bits from the initial byte CBOR item
405 "header." ``b`` is a buffer containing bytes. ``offset`` points to 436 "header." ``b`` is a buffer containing bytes. ``offset`` points to
426 if allowindefinite: 457 if allowindefinite:
427 return True, None, 0 458 return True, None, 0
428 else: 459 else:
429 raise CBORDecodeError('indefinite length uint not allowed here') 460 raise CBORDecodeError('indefinite length uint not allowed here')
430 elif subtype >= 28: 461 elif subtype >= 28:
431 raise CBORDecodeError('unsupported subtype on integer type: %d' % 462 raise CBORDecodeError(
432 subtype) 463 'unsupported subtype on integer type: %d' % subtype
464 )
433 465
434 if subtype == 24: 466 if subtype == 24:
435 s = STRUCT_BIG_UBYTE 467 s = STRUCT_BIG_UBYTE
436 elif subtype == 25: 468 elif subtype == 25:
437 s = STRUCT_BIG_USHORT 469 s = STRUCT_BIG_USHORT
445 if len(b) - offset >= s.size: 477 if len(b) - offset >= s.size:
446 return True, s.unpack_from(b, offset)[0], s.size 478 return True, s.unpack_from(b, offset)[0], s.size
447 else: 479 else:
448 return False, None, len(b) - offset - s.size 480 return False, None, len(b) - offset - s.size
449 481
482
450 class bytestringchunk(bytes): 483 class bytestringchunk(bytes):
451 """Represents a chunk/segment in an indefinite length bytestring. 484 """Represents a chunk/segment in an indefinite length bytestring.
452 485
453 This behaves like a ``bytes`` but in addition has the ``isfirst`` 486 This behaves like a ``bytes`` but in addition has the ``isfirst``
454 and ``islast`` attributes indicating whether this chunk is the first 487 and ``islast`` attributes indicating whether this chunk is the first
459 self = bytes.__new__(cls, v) 492 self = bytes.__new__(cls, v)
460 self.isfirst = first 493 self.isfirst = first
461 self.islast = last 494 self.islast = last
462 495
463 return self 496 return self
497
464 498
465 class sansiodecoder(object): 499 class sansiodecoder(object):
466 """A CBOR decoder that doesn't perform its own I/O. 500 """A CBOR decoder that doesn't perform its own I/O.
467 501
468 To use, construct an instance and feed it segments containing 502 To use, construct an instance and feed it segments containing
604 # A normal value. 638 # A normal value.
605 if special == SPECIAL_NONE: 639 if special == SPECIAL_NONE:
606 self._decodedvalues.append(value) 640 self._decodedvalues.append(value)
607 641
608 elif special == SPECIAL_START_ARRAY: 642 elif special == SPECIAL_START_ARRAY:
609 self._collectionstack.append({ 643 self._collectionstack.append(
610 'remaining': value, 644 {'remaining': value, 'v': [],}
611 'v': [], 645 )
612 })
613 self._state = self._STATE_WANT_ARRAY_VALUE 646 self._state = self._STATE_WANT_ARRAY_VALUE
614 647
615 elif special == SPECIAL_START_MAP: 648 elif special == SPECIAL_START_MAP:
616 self._collectionstack.append({ 649 self._collectionstack.append(
617 'remaining': value, 650 {'remaining': value, 'v': {},}
618 'v': {}, 651 )
619 })
620 self._state = self._STATE_WANT_MAP_KEY 652 self._state = self._STATE_WANT_MAP_KEY
621 653
622 elif special == SPECIAL_START_SET: 654 elif special == SPECIAL_START_SET:
623 self._collectionstack.append({ 655 self._collectionstack.append(
624 'remaining': value, 656 {'remaining': value, 'v': set(),}
625 'v': set(), 657 )
626 })
627 self._state = self._STATE_WANT_SET_VALUE 658 self._state = self._STATE_WANT_SET_VALUE
628 659
629 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: 660 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
630 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST 661 self._state = self._STATE_WANT_BYTESTRING_CHUNK_FIRST
631 662
632 else: 663 else:
633 raise CBORDecodeError('unhandled special state: %d' % 664 raise CBORDecodeError(
634 special) 665 'unhandled special state: %d' % special
666 )
635 667
636 # This value becomes an element of the current array. 668 # This value becomes an element of the current array.
637 elif self._state == self._STATE_WANT_ARRAY_VALUE: 669 elif self._state == self._STATE_WANT_ARRAY_VALUE:
638 # Simple values get appended. 670 # Simple values get appended.
639 if special == SPECIAL_NONE: 671 if special == SPECIAL_NONE:
649 newvalue = [] 681 newvalue = []
650 682
651 lastc['v'].append(newvalue) 683 lastc['v'].append(newvalue)
652 lastc['remaining'] -= 1 684 lastc['remaining'] -= 1
653 685
654 self._collectionstack.append({ 686 self._collectionstack.append(
655 'remaining': value, 687 {'remaining': value, 'v': newvalue,}
656 'v': newvalue, 688 )
657 })
658 689
659 # self._state doesn't need changed. 690 # self._state doesn't need changed.
660 691
661 # A map nested within an array. 692 # A map nested within an array.
662 elif special == SPECIAL_START_MAP: 693 elif special == SPECIAL_START_MAP:
664 newvalue = {} 695 newvalue = {}
665 696
666 lastc['v'].append(newvalue) 697 lastc['v'].append(newvalue)
667 lastc['remaining'] -= 1 698 lastc['remaining'] -= 1
668 699
669 self._collectionstack.append({ 700 self._collectionstack.append(
670 'remaining': value, 701 {'remaining': value, 'v': newvalue}
671 'v': newvalue 702 )
672 })
673 703
674 self._state = self._STATE_WANT_MAP_KEY 704 self._state = self._STATE_WANT_MAP_KEY
675 705
676 elif special == SPECIAL_START_SET: 706 elif special == SPECIAL_START_SET:
677 lastc = self._collectionstack[-1] 707 lastc = self._collectionstack[-1]
678 newvalue = set() 708 newvalue = set()
679 709
680 lastc['v'].append(newvalue) 710 lastc['v'].append(newvalue)
681 lastc['remaining'] -= 1 711 lastc['remaining'] -= 1
682 712
683 self._collectionstack.append({ 713 self._collectionstack.append(
684 'remaining': value, 714 {'remaining': value, 'v': newvalue,}
685 'v': newvalue, 715 )
686 })
687 716
688 self._state = self._STATE_WANT_SET_VALUE 717 self._state = self._STATE_WANT_SET_VALUE
689 718
690 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: 719 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
691 raise CBORDecodeError('indefinite length bytestrings ' 720 raise CBORDecodeError(
692 'not allowed as array values') 721 'indefinite length bytestrings '
722 'not allowed as array values'
723 )
693 724
694 else: 725 else:
695 raise CBORDecodeError('unhandled special item when ' 726 raise CBORDecodeError(
696 'expecting array value: %d' % special) 727 'unhandled special item when '
728 'expecting array value: %d' % special
729 )
697 730
698 # This value becomes the key of the current map instance. 731 # This value becomes the key of the current map instance.
699 elif self._state == self._STATE_WANT_MAP_KEY: 732 elif self._state == self._STATE_WANT_MAP_KEY:
700 if special == SPECIAL_NONE: 733 if special == SPECIAL_NONE:
701 self._currentmapkey = value 734 self._currentmapkey = value
702 self._state = self._STATE_WANT_MAP_VALUE 735 self._state = self._STATE_WANT_MAP_VALUE
703 736
704 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: 737 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
705 raise CBORDecodeError('indefinite length bytestrings ' 738 raise CBORDecodeError(
706 'not allowed as map keys') 739 'indefinite length bytestrings '
707 740 'not allowed as map keys'
708 elif special in (SPECIAL_START_ARRAY, SPECIAL_START_MAP, 741 )
709 SPECIAL_START_SET): 742
710 raise CBORDecodeError('collections not supported as map ' 743 elif special in (
711 'keys') 744 SPECIAL_START_ARRAY,
745 SPECIAL_START_MAP,
746 SPECIAL_START_SET,
747 ):
748 raise CBORDecodeError(
749 'collections not supported as map ' 'keys'
750 )
712 751
713 # We do not allow special values to be used as map keys. 752 # We do not allow special values to be used as map keys.
714 else: 753 else:
715 raise CBORDecodeError('unhandled special item when ' 754 raise CBORDecodeError(
716 'expecting map key: %d' % special) 755 'unhandled special item when '
756 'expecting map key: %d' % special
757 )
717 758
718 # This value becomes the value of the current map key. 759 # This value becomes the value of the current map key.
719 elif self._state == self._STATE_WANT_MAP_VALUE: 760 elif self._state == self._STATE_WANT_MAP_VALUE:
720 # Simple values simply get inserted into the map. 761 # Simple values simply get inserted into the map.
721 if special == SPECIAL_NONE: 762 if special == SPECIAL_NONE:
731 newvalue = [] 772 newvalue = []
732 773
733 lastc['v'][self._currentmapkey] = newvalue 774 lastc['v'][self._currentmapkey] = newvalue
734 lastc['remaining'] -= 1 775 lastc['remaining'] -= 1
735 776
736 self._collectionstack.append({ 777 self._collectionstack.append(
737 'remaining': value, 778 {'remaining': value, 'v': newvalue,}
738 'v': newvalue, 779 )
739 })
740 780
741 self._state = self._STATE_WANT_ARRAY_VALUE 781 self._state = self._STATE_WANT_ARRAY_VALUE
742 782
743 # A new map is used as the map value. 783 # A new map is used as the map value.
744 elif special == SPECIAL_START_MAP: 784 elif special == SPECIAL_START_MAP:
746 newvalue = {} 786 newvalue = {}
747 787
748 lastc['v'][self._currentmapkey] = newvalue 788 lastc['v'][self._currentmapkey] = newvalue
749 lastc['remaining'] -= 1 789 lastc['remaining'] -= 1
750 790
751 self._collectionstack.append({ 791 self._collectionstack.append(
752 'remaining': value, 792 {'remaining': value, 'v': newvalue,}
753 'v': newvalue, 793 )
754 })
755 794
756 self._state = self._STATE_WANT_MAP_KEY 795 self._state = self._STATE_WANT_MAP_KEY
757 796
758 # A new set is used as the map value. 797 # A new set is used as the map value.
759 elif special == SPECIAL_START_SET: 798 elif special == SPECIAL_START_SET:
761 newvalue = set() 800 newvalue = set()
762 801
763 lastc['v'][self._currentmapkey] = newvalue 802 lastc['v'][self._currentmapkey] = newvalue
764 lastc['remaining'] -= 1 803 lastc['remaining'] -= 1
765 804
766 self._collectionstack.append({ 805 self._collectionstack.append(
767 'remaining': value, 806 {'remaining': value, 'v': newvalue,}
768 'v': newvalue, 807 )
769 })
770 808
771 self._state = self._STATE_WANT_SET_VALUE 809 self._state = self._STATE_WANT_SET_VALUE
772 810
773 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: 811 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
774 raise CBORDecodeError('indefinite length bytestrings not ' 812 raise CBORDecodeError(
775 'allowed as map values') 813 'indefinite length bytestrings not '
814 'allowed as map values'
815 )
776 816
777 else: 817 else:
778 raise CBORDecodeError('unhandled special item when ' 818 raise CBORDecodeError(
779 'expecting map value: %d' % special) 819 'unhandled special item when '
820 'expecting map value: %d' % special
821 )
780 822
781 self._currentmapkey = None 823 self._currentmapkey = None
782 824
783 # This value is added to the current set. 825 # This value is added to the current set.
784 elif self._state == self._STATE_WANT_SET_VALUE: 826 elif self._state == self._STATE_WANT_SET_VALUE:
786 lastc = self._collectionstack[-1] 828 lastc = self._collectionstack[-1]
787 lastc['v'].add(value) 829 lastc['v'].add(value)
788 lastc['remaining'] -= 1 830 lastc['remaining'] -= 1
789 831
790 elif special == SPECIAL_START_INDEFINITE_BYTESTRING: 832 elif special == SPECIAL_START_INDEFINITE_BYTESTRING:
791 raise CBORDecodeError('indefinite length bytestrings not ' 833 raise CBORDecodeError(
792 'allowed as set values') 834 'indefinite length bytestrings not '
793 835 'allowed as set values'
794 elif special in (SPECIAL_START_ARRAY, 836 )
795 SPECIAL_START_MAP, 837
796 SPECIAL_START_SET): 838 elif special in (
797 raise CBORDecodeError('collections not allowed as set ' 839 SPECIAL_START_ARRAY,
798 'values') 840 SPECIAL_START_MAP,
841 SPECIAL_START_SET,
842 ):
843 raise CBORDecodeError(
844 'collections not allowed as set ' 'values'
845 )
799 846
800 # We don't allow non-trivial types to exist as set values. 847 # We don't allow non-trivial types to exist as set values.
801 else: 848 else:
802 raise CBORDecodeError('unhandled special item when ' 849 raise CBORDecodeError(
803 'expecting set value: %d' % special) 850 'unhandled special item when '
851 'expecting set value: %d' % special
852 )
804 853
805 # This value represents the first chunk in an indefinite length 854 # This value represents the first chunk in an indefinite length
806 # bytestring. 855 # bytestring.
807 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST: 856 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_FIRST:
808 # We received a full chunk. 857 # We received a full chunk.
809 if special == SPECIAL_NONE: 858 if special == SPECIAL_NONE:
810 self._decodedvalues.append(bytestringchunk(value, 859 self._decodedvalues.append(
811 first=True)) 860 bytestringchunk(value, first=True)
861 )
812 862
813 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT 863 self._state = self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT
814 864
815 # The end of stream marker. This means it is an empty 865 # The end of stream marker. This means it is an empty
816 # indefinite length bytestring. 866 # indefinite length bytestring.
817 elif special == SPECIAL_INDEFINITE_BREAK: 867 elif special == SPECIAL_INDEFINITE_BREAK:
818 # We /could/ convert this to a b''. But we want to preserve 868 # We /could/ convert this to a b''. But we want to preserve
819 # the nature of the underlying data so consumers expecting 869 # the nature of the underlying data so consumers expecting
820 # an indefinite length bytestring get one. 870 # an indefinite length bytestring get one.
821 self._decodedvalues.append(bytestringchunk(b'', 871 self._decodedvalues.append(
822 first=True, 872 bytestringchunk(b'', first=True, last=True)
823 last=True)) 873 )
824 874
825 # Since indefinite length bytestrings can't be used in 875 # Since indefinite length bytestrings can't be used in
826 # collections, we must be at the root level. 876 # collections, we must be at the root level.
827 assert not self._collectionstack 877 assert not self._collectionstack
828 self._state = self._STATE_NONE 878 self._state = self._STATE_NONE
829 879
830 else: 880 else:
831 raise CBORDecodeError('unexpected special value when ' 881 raise CBORDecodeError(
832 'expecting bytestring chunk: %d' % 882 'unexpected special value when '
833 special) 883 'expecting bytestring chunk: %d' % special
884 )
834 885
835 # This value represents the non-initial chunk in an indefinite 886 # This value represents the non-initial chunk in an indefinite
836 # length bytestring. 887 # length bytestring.
837 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT: 888 elif self._state == self._STATE_WANT_BYTESTRING_CHUNK_SUBSEQUENT:
838 # We received a full chunk. 889 # We received a full chunk.
847 # collections, we must be at the root level. 898 # collections, we must be at the root level.
848 assert not self._collectionstack 899 assert not self._collectionstack
849 self._state = self._STATE_NONE 900 self._state = self._STATE_NONE
850 901
851 else: 902 else:
852 raise CBORDecodeError('unexpected special value when ' 903 raise CBORDecodeError(
853 'expecting bytestring chunk: %d' % 904 'unexpected special value when '
854 special) 905 'expecting bytestring chunk: %d' % special
906 )
855 907
856 else: 908 else:
857 raise CBORDecodeError('unhandled decoder state: %d' % 909 raise CBORDecodeError(
858 self._state) 910 'unhandled decoder state: %d' % self._state
911 )
859 912
860 # We could have just added the final value in a collection. End 913 # We could have just added the final value in a collection. End
861 # all complete collections at the top of the stack. 914 # all complete collections at the top of the stack.
862 while True: 915 while True:
863 # Bail if we're not waiting on a new collection item. 916 # Bail if we're not waiting on a new collection item.
864 if self._state not in (self._STATE_WANT_ARRAY_VALUE, 917 if self._state not in (
865 self._STATE_WANT_MAP_KEY, 918 self._STATE_WANT_ARRAY_VALUE,
866 self._STATE_WANT_SET_VALUE): 919 self._STATE_WANT_MAP_KEY,
920 self._STATE_WANT_SET_VALUE,
921 ):
867 break 922 break
868 923
869 # Or we are expecting more items for this collection. 924 # Or we are expecting more items for this collection.
870 lastc = self._collectionstack[-1] 925 lastc = self._collectionstack[-1]
871 926
907 962
908 l = list(self._decodedvalues) 963 l = list(self._decodedvalues)
909 self._decodedvalues = [] 964 self._decodedvalues = []
910 return l 965 return l
911 966
967
912 class bufferingdecoder(object): 968 class bufferingdecoder(object):
913 """A CBOR decoder that buffers undecoded input. 969 """A CBOR decoder that buffers undecoded input.
914 970
915 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering 971 This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
916 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered 972 layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
917 and concatenated with any new input that arrives later. 973 and concatenated with any new input that arrives later.
918 974
919 TODO consider adding limits as to the maximum amount of data that can 975 TODO consider adding limits as to the maximum amount of data that can
920 be buffered. 976 be buffered.
921 """ 977 """
978
922 def __init__(self): 979 def __init__(self):
923 self._decoder = sansiodecoder() 980 self._decoder = sansiodecoder()
924 self._chunks = [] 981 self._chunks = []
925 self._wanted = 0 982 self._wanted = 0
926 983
976 return available, readcount - oldlen, wanted 1033 return available, readcount - oldlen, wanted
977 1034
978 def getavailable(self): 1035 def getavailable(self):
979 return self._decoder.getavailable() 1036 return self._decoder.getavailable()
980 1037
1038
981 def decodeall(b): 1039 def decodeall(b):
982 """Decode all CBOR items present in an iterable of bytes. 1040 """Decode all CBOR items present in an iterable of bytes.
983 1041
984 In addition to regular decode errors, raises CBORDecodeError if the 1042 In addition to regular decode errors, raises CBORDecodeError if the
985 entirety of the passed buffer does not fully decode to complete CBOR 1043 entirety of the passed buffer does not fully decode to complete CBOR