Mercurial > hg
changeset 37126:4bd73a955ab0
thirdparty: vendor cbor2 python library
CBOR stands for Concise Binary Object Representation, which is a data format
which is very compact and extensible.
This patch moves the python library which can serilaize and deserialize python
objects to/from cbor formats. The library is taken from
https://github.com/agronholm/cbor2/ from commit
84181540f6eb650437e3f73cd104a65661fe8e67.
Unrequired files from the cbor library - docs/, tests/, setup.py, setup.cfg,
and tox.ini - have not been vendored.
There is another python library for cbor at
https://github.com/brianolson/cbor_py/ which is used in evolve extension and was
imported in initial version of this series. That library though contains C code
and is bit faster, but has known bugs around serializing nested structures, is
unmaintained, raises an Exception object instead of a more dedicated Error type.
So, it's better to use a bug free and actively maintained library.
This library is not yet used and will be used in later commits.
# no-check-commit because we are importing a third library module
Differential Revision: https://phab.mercurial-scm.org/D2750
author | Pulkit Goyal <7895pulkit@gmail.com> |
---|---|
date | Mon, 26 Mar 2018 08:33:57 -0700 |
parents | 6f570c501e3e |
children | 0e06d8086295 |
files | mercurial/thirdparty/cbor/.travis.yml mercurial/thirdparty/cbor/LICENSE.txt mercurial/thirdparty/cbor/README.rst mercurial/thirdparty/cbor/cbor2/__init__.py mercurial/thirdparty/cbor/cbor2/compat.py mercurial/thirdparty/cbor/cbor2/decoder.py mercurial/thirdparty/cbor/cbor2/encoder.py mercurial/thirdparty/cbor/cbor2/types.py |
diffstat | 8 files changed, 1092 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/.travis.yml Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,58 @@ +language: python +sudo: false + +stages: + - name: test + - name: deploy to pypi + if: type = push AND tag =~ ^\d+\.\d+\.\d+ + +jobs: + fast_finish: true + include: + - env: TOXENV=flake8 + + - env: TOXENV=pypy + python: pypy3 + + - env: TOXENV=pypy3 + python: pypy3 + + - env: TOXENV=py27 + python: "2.7" + after_success: &after_success + - pip install coveralls + - coveralls + + - env: TOXENV=py33 + python: "3.3" + after_success: *after_success + + - env: TOXENV=py34 + python: "3.4" + after_success: *after_success + + - env: TOXENV=py35 + python: "3.5" + after_success: *after_success + + - env: TOXENV=py36 + python: "3.6" + after_success: *after_success + + - stage: deploy to pypi + install: pip install "setuptools >= 36.2.7" + script: skip + deploy: + provider: pypi + user: agronholm + password: + secure: QZ5qoxsrzns/b27adWNzh/OAJp86yRuxTyAFhvas/pbkiALdlT/+PGyhJBnpe+7WBTWnkIXl+YU//voJ0btf6DJcWwgRavMsy22LJJPkvvK+2DHiZ//DbpLbqKWc74y4moce29BCajFTm9JkVwcL2dgN9WuZt+Tay0efcP4sESLxo5lIGdlaQbu+9zVs61Z4Ov+yyEMO/j3LeKshNmUq+84CveQWMiXndXBfJX5TWwjahmUNDp5fMctJxr4fqgL4HCTVQhU79dPc00yDEGS45QkpP8JDrF1DQvU5Ht4COz/Lvzt11pwsAvws2ddclqBUCQsGaWvEWH5rxZTYx/MaMVdTctaUVNoT0wnFUsXXZkomQV0x8vb5RtRLDrKwXosXlSEqnRyiKhdgHGoswHvB7XF5BtQ5RmydRX77pwEGmFd3lqRif2bos0MEeOJA8Xds0TGOKO4PyokBnj/a0tjT2LEVxObmTT6grz5QPXi386AWgxbNl0Lp7cnkSpCqC1hEHVqrDlbtu7uvfGwwe/sYlEcQ07PNCvFoR2GXJawbeHmJRfz+KXjffrt2yCzc671FL1goUysHKdBCppvUInI8FCMQpVWEh5MmQJKB4IpDrhqfo0VS+NNZgZ8lFStq27Pmwqf1HUTGlaDi9VQ0Vo7tW5j4JbD/JvOQSb3j9DjUFps= + distributions: sdist bdist_wheel + on: + tags: true + +install: + - pip install "setuptools >= 36.2.7" + - pip install tox + +script: tox
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/LICENSE.txt Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,19 @@ +This is the MIT license: http://www.opensource.org/licenses/mit-license.php + +Copyright (c) Alex Grönholm + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/README.rst Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,24 @@ +.. image:: https://travis-ci.org/agronholm/cbor2.svg?branch=master + :target: https://travis-ci.org/agronholm/cbor2 + :alt: Build Status +.. image:: https://coveralls.io/repos/github/agronholm/cbor2/badge.svg?branch=master + :target: https://coveralls.io/github/agronholm/cbor2?branch=master + :alt: Code Coverage + +This library provides encoding and decoding for the Concise Binary Object Representation (CBOR) +(`RFC 7049`_) serialization format. + +There exists another Python CBOR implementation (cbor) which is faster on CPython due to its C +extensions. On PyPy, cbor2 and cbor are almost identical in performance. The other implementation +also lacks documentation and a comprehensive test suite, does not support most standard extension +tags and is known to crash (segfault) when passed a cyclic structure (say, a list containing +itself). + +.. _RFC 7049: https://tools.ietf.org/html/rfc7049 + +Project links +------------- + +* `Documentation <http://cbor2.readthedocs.org/>`_ +* `Source code <https://github.com/agronholm/cbor2>`_ +* `Issue tracker <https://github.com/agronholm/cbor2/issues>`_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/cbor2/__init__.py Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,3 @@ +from .decoder import load, loads, CBORDecoder, CBORDecodeError # noqa +from .encoder import dump, dumps, CBOREncoder, CBOREncodeError, shareable_encoder # noqa +from .types import CBORTag, CBORSimpleValue, undefined # noqa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/cbor2/compat.py Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,101 @@ +from math import ldexp +import struct +import sys + + +if sys.version_info.major < 3: + from datetime import tzinfo, timedelta + + class timezone(tzinfo): + def __init__(self, offset): + self.offset = offset + + def utcoffset(self, dt): + return self.offset + + def dst(self, dt): + return timedelta(0) + + def tzname(self, dt): + return 'UTC+00:00' + + def as_unicode(string): + return string.decode('utf-8') + + def iteritems(self): + return self.iteritems() + + def bytes_from_list(values): + return bytes(bytearray(values)) + + byte_as_integer = ord + timezone.utc = timezone(timedelta(0)) + xrange = xrange # noqa: F821 + long = long # noqa: F821 + unicode = unicode # noqa: F821 +else: + from datetime import timezone + + def byte_as_integer(bytestr): + return bytestr[0] + + def as_unicode(string): + return string + + def iteritems(self): + return self.items() + + xrange = range + long = int + unicode = str + bytes_from_list = bytes + + +if sys.version_info.major >= 3 and sys.version_info.minor >= 6: + # Python 3.6 added 16 bit floating point to struct + + def pack_float16(value): + try: + return struct.pack('>Be', 0xf9, value) + except OverflowError: + return False + + def unpack_float16(payload): + return struct.unpack('>e', payload)[0] +else: + def pack_float16(value): + # Based on node-cbor by hildjj + # which was based in turn on Carsten Borman's cn-cbor + u32 = struct.pack('>f', value) + u = struct.unpack('>I', u32)[0] + + if u & 0x1FFF != 0: + return False + + s16 = (u >> 16) & 0x8000 + exponent = (u >> 23) & 0xff + mantissa = u & 0x7fffff + + if 113 <= exponent <= 142: + s16 += ((exponent - 112) << 10) + (mantissa >> 13) + elif 103 <= exponent < 113: + if mantissa & ((1 << (126 - exponent)) - 1): + return False + + s16 += ((mantissa + 0x800000) >> (126 - exponent)) + else: + return False + + return struct.pack('>BH', 0xf9, s16) + + def unpack_float16(payload): + # Code adapted from RFC 7049, appendix D + def decode_single(single): + return struct.unpack("!f", struct.pack("!I", single))[0] + + payload = struct.unpack('>H', payload)[0] + value = (payload & 0x7fff) << 13 | (payload & 0x8000) << 16 + if payload & 0x7c00 != 0x7c00: + return ldexp(decode_single(value), 112) + + return decode_single(value | 0x7f800000)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/cbor2/decoder.py Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,407 @@ +import re +import struct +from datetime import datetime, timedelta +from io import BytesIO + +from .compat import timezone, xrange, byte_as_integer, unpack_float16 +from .types import CBORTag, undefined, break_marker, CBORSimpleValue + +timestamp_re = re.compile(r'^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)' + r'(?:\.(\d+))?(?:Z|([+-]\d\d):(\d\d))$') + + +class CBORDecodeError(Exception): + """Raised when an error occurs deserializing a CBOR datastream.""" + + +def decode_uint(decoder, subtype, shareable_index=None, allow_indefinite=False): + # Major tag 0 + if subtype < 24: + return subtype + elif subtype == 24: + return struct.unpack('>B', decoder.read(1))[0] + elif subtype == 25: + return struct.unpack('>H', decoder.read(2))[0] + elif subtype == 26: + return struct.unpack('>L', decoder.read(4))[0] + elif subtype == 27: + return struct.unpack('>Q', decoder.read(8))[0] + elif subtype == 31 and allow_indefinite: + return None + else: + raise CBORDecodeError('unknown unsigned integer subtype 0x%x' % subtype) + + +def decode_negint(decoder, subtype, shareable_index=None): + # Major tag 1 + uint = decode_uint(decoder, subtype) + return -uint - 1 + + +def decode_bytestring(decoder, subtype, shareable_index=None): + # Major tag 2 + length = decode_uint(decoder, subtype, allow_indefinite=True) + if length is None: + # Indefinite length + buf = bytearray() + while True: + initial_byte = byte_as_integer(decoder.read(1)) + if initial_byte == 255: + return buf + else: + length = decode_uint(decoder, initial_byte & 31) + value = decoder.read(length) + buf.extend(value) + else: + return decoder.read(length) + + +def decode_string(decoder, subtype, shareable_index=None): + # Major tag 3 + return decode_bytestring(decoder, subtype).decode('utf-8') + + +def decode_array(decoder, subtype, shareable_index=None): + # Major tag 4 + items = [] + decoder.set_shareable(shareable_index, items) + length = decode_uint(decoder, subtype, allow_indefinite=True) + if length is None: + # Indefinite length + while True: + value = decoder.decode() + if value is break_marker: + break + else: + items.append(value) + else: + for _ in xrange(length): + item = decoder.decode() + items.append(item) + + return items + + +def decode_map(decoder, subtype, shareable_index=None): + # Major tag 5 + dictionary = {} + decoder.set_shareable(shareable_index, dictionary) + length = decode_uint(decoder, subtype, allow_indefinite=True) + if length is None: + # Indefinite length + while True: + key = decoder.decode() + if key is break_marker: + break + else: + value = decoder.decode() + dictionary[key] = value + else: + for _ in xrange(length): + key = decoder.decode() + value = decoder.decode() + dictionary[key] = value + + if decoder.object_hook: + return decoder.object_hook(decoder, dictionary) + else: + return dictionary + + +def decode_semantic(decoder, subtype, shareable_index=None): + # Major tag 6 + tagnum = decode_uint(decoder, subtype) + + # Special handling for the "shareable" tag + if tagnum == 28: + shareable_index = decoder._allocate_shareable() + return decoder.decode(shareable_index) + + value = decoder.decode() + semantic_decoder = semantic_decoders.get(tagnum) + if semantic_decoder: + return semantic_decoder(decoder, value, shareable_index) + + tag = CBORTag(tagnum, value) + if decoder.tag_hook: + return decoder.tag_hook(decoder, tag, shareable_index) + else: + return tag + + +def decode_special(decoder, subtype, shareable_index=None): + # Simple value + if subtype < 20: + return CBORSimpleValue(subtype) + + # Major tag 7 + return special_decoders[subtype](decoder) + + +# +# Semantic decoders (major tag 6) +# + +def decode_datetime_string(decoder, value, shareable_index=None): + # Semantic tag 0 + match = timestamp_re.match(value) + if match: + year, month, day, hour, minute, second, micro, offset_h, offset_m = match.groups() + if offset_h: + tz = timezone(timedelta(hours=int(offset_h), minutes=int(offset_m))) + else: + tz = timezone.utc + + return datetime(int(year), int(month), int(day), int(hour), int(minute), int(second), + int(micro or 0), tz) + else: + raise CBORDecodeError('invalid datetime string: {}'.format(value)) + + +def decode_epoch_datetime(decoder, value, shareable_index=None): + # Semantic tag 1 + return datetime.fromtimestamp(value, timezone.utc) + + +def decode_positive_bignum(decoder, value, shareable_index=None): + # Semantic tag 2 + from binascii import hexlify + return int(hexlify(value), 16) + + +def decode_negative_bignum(decoder, value, shareable_index=None): + # Semantic tag 3 + return -decode_positive_bignum(decoder, value) - 1 + + +def decode_fraction(decoder, value, shareable_index=None): + # Semantic tag 4 + from decimal import Decimal + exp = Decimal(value[0]) + mantissa = Decimal(value[1]) + return mantissa * (10 ** exp) + + +def decode_bigfloat(decoder, value, shareable_index=None): + # Semantic tag 5 + from decimal import Decimal + exp = Decimal(value[0]) + mantissa = Decimal(value[1]) + return mantissa * (2 ** exp) + + +def decode_sharedref(decoder, value, shareable_index=None): + # Semantic tag 29 + try: + shared = decoder._shareables[value] + except IndexError: + raise CBORDecodeError('shared reference %d not found' % value) + + if shared is None: + raise CBORDecodeError('shared value %d has not been initialized' % value) + else: + return shared + + +def decode_rational(decoder, value, shareable_index=None): + # Semantic tag 30 + from fractions import Fraction + return Fraction(*value) + + +def decode_regexp(decoder, value, shareable_index=None): + # Semantic tag 35 + return re.compile(value) + + +def decode_mime(decoder, value, shareable_index=None): + # Semantic tag 36 + from email.parser import Parser + return Parser().parsestr(value) + + +def decode_uuid(decoder, value, shareable_index=None): + # Semantic tag 37 + from uuid import UUID + return UUID(bytes=value) + + +def decode_set(decoder, value, shareable_index=None): + # Semantic tag 258 + return set(value) + + +# +# Special decoders (major tag 7) +# + +def decode_simple_value(decoder, shareable_index=None): + return CBORSimpleValue(struct.unpack('>B', decoder.read(1))[0]) + + +def decode_float16(decoder, shareable_index=None): + payload = decoder.read(2) + return unpack_float16(payload) + + +def decode_float32(decoder, shareable_index=None): + return struct.unpack('>f', decoder.read(4))[0] + + +def decode_float64(decoder, shareable_index=None): + return struct.unpack('>d', decoder.read(8))[0] + + +major_decoders = { + 0: decode_uint, + 1: decode_negint, + 2: decode_bytestring, + 3: decode_string, + 4: decode_array, + 5: decode_map, + 6: decode_semantic, + 7: decode_special +} + +special_decoders = { + 20: lambda self: False, + 21: lambda self: True, + 22: lambda self: None, + 23: lambda self: undefined, + 24: decode_simple_value, + 25: decode_float16, + 26: decode_float32, + 27: decode_float64, + 31: lambda self: break_marker +} + +semantic_decoders = { + 0: decode_datetime_string, + 1: decode_epoch_datetime, + 2: decode_positive_bignum, + 3: decode_negative_bignum, + 4: decode_fraction, + 5: decode_bigfloat, + 29: decode_sharedref, + 30: decode_rational, + 35: decode_regexp, + 36: decode_mime, + 37: decode_uuid, + 258: decode_set +} + + +class CBORDecoder(object): + """ + Deserializes a CBOR encoded byte stream. + + :param tag_hook: Callable that takes 3 arguments: the decoder instance, the + :class:`~cbor2.types.CBORTag` and the shareable index for the resulting object, if any. + This callback is called for any tags for which there is no built-in decoder. + The return value is substituted for the CBORTag object in the deserialized output. + :param object_hook: Callable that takes 2 arguments: the decoder instance and the dictionary. + This callback is called for each deserialized :class:`dict` object. + The return value is substituted for the dict in the deserialized output. + """ + + __slots__ = ('fp', 'tag_hook', 'object_hook', '_shareables') + + def __init__(self, fp, tag_hook=None, object_hook=None): + self.fp = fp + self.tag_hook = tag_hook + self.object_hook = object_hook + self._shareables = [] + + def _allocate_shareable(self): + self._shareables.append(None) + return len(self._shareables) - 1 + + def set_shareable(self, index, value): + """ + Set the shareable value for the last encountered shared value marker, if any. + + If the given index is ``None``, nothing is done. + + :param index: the value of the ``shared_index`` argument to the decoder + :param value: the shared value + + """ + if index is not None: + self._shareables[index] = value + + def read(self, amount): + """ + Read bytes from the data stream. + + :param int amount: the number of bytes to read + + """ + data = self.fp.read(amount) + if len(data) < amount: + raise CBORDecodeError('premature end of stream (expected to read {} bytes, got {} ' + 'instead)'.format(amount, len(data))) + + return data + + def decode(self, shareable_index=None): + """ + Decode the next value from the stream. + + :raises CBORDecodeError: if there is any problem decoding the stream + + """ + try: + initial_byte = byte_as_integer(self.fp.read(1)) + major_type = initial_byte >> 5 + subtype = initial_byte & 31 + except Exception as e: + raise CBORDecodeError('error reading major type at index {}: {}' + .format(self.fp.tell(), e)) + + decoder = major_decoders[major_type] + try: + return decoder(self, subtype, shareable_index) + except CBORDecodeError: + raise + except Exception as e: + raise CBORDecodeError('error decoding value at index {}: {}'.format(self.fp.tell(), e)) + + def decode_from_bytes(self, buf): + """ + Wrap the given bytestring as a file and call :meth:`decode` with it as the argument. + + This method was intended to be used from the ``tag_hook`` hook when an object needs to be + decoded separately from the rest but while still taking advantage of the shared value + registry. + + """ + old_fp = self.fp + self.fp = BytesIO(buf) + retval = self.decode() + self.fp = old_fp + return retval + + +def loads(payload, **kwargs): + """ + Deserialize an object from a bytestring. + + :param bytes payload: the bytestring to serialize + :param kwargs: keyword arguments passed to :class:`~.CBORDecoder` + :return: the deserialized object + + """ + fp = BytesIO(payload) + return CBORDecoder(fp, **kwargs).decode() + + +def load(fp, **kwargs): + """ + Deserialize an object from an open file. + + :param fp: the input file (any file-like object) + :param kwargs: keyword arguments passed to :class:`~.CBORDecoder` + :return: the deserialized object + + """ + return CBORDecoder(fp, **kwargs).decode()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/cbor2/encoder.py Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,425 @@ +import re +import struct +from collections import OrderedDict, defaultdict +from contextlib import contextmanager +from functools import wraps +from datetime import datetime, date, time +from io import BytesIO + +from .compat import ( + iteritems, timezone, long, unicode, as_unicode, bytes_from_list, pack_float16, unpack_float16) +from .types import CBORTag, undefined, CBORSimpleValue + + +class CBOREncodeError(Exception): + """Raised when an error occurs while serializing an object into a CBOR datastream.""" + + +def shareable_encoder(func): + """ + Wrap the given encoder function to gracefully handle cyclic data structures. + + If value sharing is enabled, this marks the given value shared in the datastream on the + first call. If the value has already been passed to this method, a reference marker is + instead written to the data stream and the wrapped function is not called. + + If value sharing is disabled, only infinite recursion protection is done. + + """ + @wraps(func) + def wrapper(encoder, value, *args, **kwargs): + value_id = id(value) + container, container_index = encoder._shared_containers.get(value_id, (None, None)) + if encoder.value_sharing: + if container is value: + # Generate a reference to the previous index instead of encoding this again + encoder.write(encode_length(0xd8, 0x1d)) + encode_int(encoder, container_index) + else: + # Mark the container as shareable + encoder._shared_containers[value_id] = (value, len(encoder._shared_containers)) + encoder.write(encode_length(0xd8, 0x1c)) + func(encoder, value, *args, **kwargs) + else: + if container is value: + raise CBOREncodeError('cyclic data structure detected but value sharing is ' + 'disabled') + else: + encoder._shared_containers[value_id] = (value, None) + func(encoder, value, *args, **kwargs) + del encoder._shared_containers[value_id] + + return wrapper + + +def encode_length(major_tag, length): + if length < 24: + return struct.pack('>B', major_tag | length) + elif length < 256: + return struct.pack('>BB', major_tag | 24, length) + elif length < 65536: + return struct.pack('>BH', major_tag | 25, length) + elif length < 4294967296: + return struct.pack('>BL', major_tag | 26, length) + else: + return struct.pack('>BQ', major_tag | 27, length) + + +def encode_int(encoder, value): + # Big integers (2 ** 64 and over) + if value >= 18446744073709551616 or value < -18446744073709551616: + if value >= 0: + major_type = 0x02 + else: + major_type = 0x03 + value = -value - 1 + + values = [] + while value > 0: + value, remainder = divmod(value, 256) + values.insert(0, remainder) + + payload = bytes_from_list(values) + encode_semantic(encoder, CBORTag(major_type, payload)) + elif value >= 0: + encoder.write(encode_length(0, value)) + else: + encoder.write(encode_length(0x20, abs(value) - 1)) + + +def encode_bytestring(encoder, value): + encoder.write(encode_length(0x40, len(value)) + value) + + +def encode_bytearray(encoder, value): + encode_bytestring(encoder, bytes(value)) + + +def encode_string(encoder, value): + encoded = value.encode('utf-8') + encoder.write(encode_length(0x60, len(encoded)) + encoded) + + +@shareable_encoder +def encode_array(encoder, value): + encoder.write(encode_length(0x80, len(value))) + for item in value: + encoder.encode(item) + + +@shareable_encoder +def encode_map(encoder, value): + encoder.write(encode_length(0xa0, len(value))) + for key, val in iteritems(value): + encoder.encode(key) + encoder.encode(val) + + +def encode_sortable_key(encoder, value): + """Takes a key and calculates the length of its optimal byte representation""" + encoded = encoder.encode_to_bytes(value) + return len(encoded), encoded + + +@shareable_encoder +def encode_canonical_map(encoder, value): + """Reorder keys according to Canonical CBOR specification""" + keyed_keys = ((encode_sortable_key(encoder, key), key) for key in value.keys()) + encoder.write(encode_length(0xa0, len(value))) + for sortkey, realkey in sorted(keyed_keys): + encoder.write(sortkey[1]) + encoder.encode(value[realkey]) + + +def encode_semantic(encoder, value): + encoder.write(encode_length(0xc0, value.tag)) + encoder.encode(value.value) + + +# +# Semantic decoders (major tag 6) +# + +def encode_datetime(encoder, value): + # Semantic tag 0 + if not value.tzinfo: + if encoder.timezone: + value = value.replace(tzinfo=encoder.timezone) + else: + raise CBOREncodeError( + 'naive datetime encountered and no default timezone has been set') + + if encoder.datetime_as_timestamp: + from calendar import timegm + timestamp = timegm(value.utctimetuple()) + value.microsecond // 1000000 + encode_semantic(encoder, CBORTag(1, timestamp)) + else: + datestring = as_unicode(value.isoformat().replace('+00:00', 'Z')) + encode_semantic(encoder, CBORTag(0, datestring)) + + +def encode_date(encoder, value): + value = datetime.combine(value, time()).replace(tzinfo=timezone.utc) + encode_datetime(encoder, value) + + +def encode_decimal(encoder, value): + # Semantic tag 4 + if value.is_nan(): + encoder.write(b'\xf9\x7e\x00') + elif value.is_infinite(): + encoder.write(b'\xf9\x7c\x00' if value > 0 else b'\xf9\xfc\x00') + else: + dt = value.as_tuple() + mantissa = sum(d * 10 ** i for i, d in enumerate(reversed(dt.digits))) + with encoder.disable_value_sharing(): + encode_semantic(encoder, CBORTag(4, [dt.exponent, mantissa])) + + +def encode_rational(encoder, value): + # Semantic tag 30 + with encoder.disable_value_sharing(): + encode_semantic(encoder, CBORTag(30, [value.numerator, value.denominator])) + + +def encode_regexp(encoder, value): + # Semantic tag 35 + encode_semantic(encoder, CBORTag(35, as_unicode(value.pattern))) + + +def encode_mime(encoder, value): + # Semantic tag 36 + encode_semantic(encoder, CBORTag(36, as_unicode(value.as_string()))) + + +def encode_uuid(encoder, value): + # Semantic tag 37 + encode_semantic(encoder, CBORTag(37, value.bytes)) + + +def encode_set(encoder, value): + # Semantic tag 258 + encode_semantic(encoder, CBORTag(258, tuple(value))) + + +def encode_canonical_set(encoder, value): + # Semantic tag 258 + values = sorted([(encode_sortable_key(encoder, key), key) for key in value]) + encode_semantic(encoder, CBORTag(258, [key[1] for key in values])) + + +# +# Special encoders (major tag 7) +# + +def encode_simple_value(encoder, value): + if value.value < 20: + encoder.write(struct.pack('>B', 0xe0 | value.value)) + else: + encoder.write(struct.pack('>BB', 0xf8, value.value)) + + +def encode_float(encoder, value): + # Handle special values efficiently + import math + if math.isnan(value): + encoder.write(b'\xf9\x7e\x00') + elif math.isinf(value): + encoder.write(b'\xf9\x7c\x00' if value > 0 else b'\xf9\xfc\x00') + else: + encoder.write(struct.pack('>Bd', 0xfb, value)) + + +def encode_minimal_float(encoder, value): + # Handle special values efficiently + import math + if math.isnan(value): + encoder.write(b'\xf9\x7e\x00') + elif math.isinf(value): + encoder.write(b'\xf9\x7c\x00' if value > 0 else b'\xf9\xfc\x00') + else: + encoded = struct.pack('>Bf', 0xfa, value) + if struct.unpack('>Bf', encoded)[1] != value: + encoded = struct.pack('>Bd', 0xfb, value) + encoder.write(encoded) + else: + f16 = pack_float16(value) + if f16 and unpack_float16(f16[1:]) == value: + encoder.write(f16) + else: + encoder.write(encoded) + + +def encode_boolean(encoder, value): + encoder.write(b'\xf5' if value else b'\xf4') + + +def encode_none(encoder, value): + encoder.write(b'\xf6') + + +def encode_undefined(encoder, value): + encoder.write(b'\xf7') + + +default_encoders = OrderedDict([ + (bytes, encode_bytestring), + (bytearray, encode_bytearray), + (unicode, encode_string), + (int, encode_int), + (long, encode_int), + (float, encode_float), + (('decimal', 'Decimal'), encode_decimal), + (bool, encode_boolean), + (type(None), encode_none), + (tuple, encode_array), + (list, encode_array), + (dict, encode_map), + (defaultdict, encode_map), + (OrderedDict, encode_map), + (type(undefined), encode_undefined), + (datetime, encode_datetime), + (date, encode_date), + (type(re.compile('')), encode_regexp), + (('fractions', 'Fraction'), encode_rational), + (('email.message', 'Message'), encode_mime), + (('uuid', 'UUID'), encode_uuid), + (CBORSimpleValue, encode_simple_value), + (CBORTag, encode_semantic), + (set, encode_set), + (frozenset, encode_set) +]) + +canonical_encoders = OrderedDict([ + (float, encode_minimal_float), + (dict, encode_canonical_map), + (defaultdict, encode_canonical_map), + (OrderedDict, encode_canonical_map), + (set, encode_canonical_set), + (frozenset, encode_canonical_set) +]) + + +class CBOREncoder(object): + """ + Serializes objects to a byte stream using Concise Binary Object Representation. + + :param datetime_as_timestamp: set to ``True`` to serialize datetimes as UNIX timestamps + (this makes datetimes more concise on the wire but loses the time zone information) + :param datetime.tzinfo timezone: the default timezone to use for serializing naive datetimes + :param value_sharing: if ``True``, allows more efficient serializing of repeated values and, + more importantly, cyclic data structures, at the cost of extra line overhead + :param default: a callable that is called by the encoder with three arguments + (encoder, value, file object) when no suitable encoder has been found, and should use the + methods on the encoder to encode any objects it wants to add to the data stream + :param canonical: Forces mapping types to be output in a stable order to guarantee that the + output will always produce the same hash given the same input. + """ + + __slots__ = ('fp', 'datetime_as_timestamp', 'timezone', 'default', 'value_sharing', + 'json_compatible', '_shared_containers', '_encoders') + + def __init__(self, fp, datetime_as_timestamp=False, timezone=None, value_sharing=False, + default=None, canonical=False): + self.fp = fp + self.datetime_as_timestamp = datetime_as_timestamp + self.timezone = timezone + self.value_sharing = value_sharing + self.default = default + self._shared_containers = {} # indexes used for value sharing + self._encoders = default_encoders.copy() + if canonical: + self._encoders.update(canonical_encoders) + + def _find_encoder(self, obj_type): + from sys import modules + + for type_, enc in list(iteritems(self._encoders)): + if type(type_) is tuple: + modname, typename = type_ + imported_type = getattr(modules.get(modname), typename, None) + if imported_type is not None: + del self._encoders[type_] + self._encoders[imported_type] = enc + type_ = imported_type + else: # pragma: nocover + continue + + if issubclass(obj_type, type_): + self._encoders[obj_type] = enc + return enc + + return None + + @contextmanager + def disable_value_sharing(self): + """Disable value sharing in the encoder for the duration of the context block.""" + old_value_sharing = self.value_sharing + self.value_sharing = False + yield + self.value_sharing = old_value_sharing + + def write(self, data): + """ + Write bytes to the data stream. + + :param data: the bytes to write + + """ + self.fp.write(data) + + def encode(self, obj): + """ + Encode the given object using CBOR. + + :param obj: the object to encode + + """ + obj_type = obj.__class__ + encoder = self._encoders.get(obj_type) or self._find_encoder(obj_type) or self.default + if not encoder: + raise CBOREncodeError('cannot serialize type %s' % obj_type.__name__) + + encoder(self, obj) + + def encode_to_bytes(self, obj): + """ + Encode the given object to a byte buffer and return its value as bytes. + + This method was intended to be used from the ``default`` hook when an object needs to be + encoded separately from the rest but while still taking advantage of the shared value + registry. + + """ + old_fp = self.fp + self.fp = fp = BytesIO() + self.encode(obj) + self.fp = old_fp + return fp.getvalue() + + +def dumps(obj, **kwargs): + """ + Serialize an object to a bytestring. + + :param obj: the object to serialize + :param kwargs: keyword arguments passed to :class:`~.CBOREncoder` + :return: the serialized output + :rtype: bytes + + """ + fp = BytesIO() + dump(obj, fp, **kwargs) + return fp.getvalue() + + +def dump(obj, fp, **kwargs): + """ + Serialize an object to a file. + + :param obj: the object to serialize + :param fp: a file-like object + :param kwargs: keyword arguments passed to :class:`~.CBOREncoder` + + """ + CBOREncoder(fp, **kwargs).encode(obj)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/thirdparty/cbor/cbor2/types.py Mon Mar 26 08:33:57 2018 -0700 @@ -0,0 +1,55 @@ +class CBORTag(object): + """ + Represents a CBOR semantic tag. + + :param int tag: tag number + :param value: encapsulated value (any object) + """ + + __slots__ = 'tag', 'value' + + def __init__(self, tag, value): + self.tag = tag + self.value = value + + def __eq__(self, other): + if isinstance(other, CBORTag): + return self.tag == other.tag and self.value == other.value + return NotImplemented + + def __repr__(self): + return 'CBORTag({self.tag}, {self.value!r})'.format(self=self) + + +class CBORSimpleValue(object): + """ + Represents a CBOR "simple value". + + :param int value: the value (0-255) + """ + + __slots__ = 'value' + + def __init__(self, value): + if value < 0 or value > 255: + raise TypeError('simple value too big') + self.value = value + + def __eq__(self, other): + if isinstance(other, CBORSimpleValue): + return self.value == other.value + elif isinstance(other, int): + return self.value == other + return NotImplemented + + def __repr__(self): + return 'CBORSimpleValue({self.value})'.format(self=self) + + +class UndefinedType(object): + __slots__ = () + + +#: Represents the "undefined" value. +undefined = UndefinedType() +break_marker = object()