changeset 39413:babad5ebaf0a

cborutil: add a buffering decoder The sansiodecoder leaves it up to the callers to feed in data that wasn't fully consumed last time. This commit implements a decoder that performs buffering of leftover chunks from the previous invocation. It otherwise behaves identically to sansiodecoder. Differential Revision: https://phab.mercurial-scm.org/D4434
author Gregory Szorc <gregory.szorc@gmail.com>
date Wed, 29 Aug 2018 14:29:01 -0700
parents a40d3da89b7d
children 1cbe19eb496d
files mercurial/utils/cborutil.py tests/test-cbor.py
diffstat 2 files changed, 66 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/utils/cborutil.py	Fri Aug 31 15:54:17 2018 -0700
+++ b/mercurial/utils/cborutil.py	Wed Aug 29 14:29:01 2018 -0700
@@ -898,6 +898,48 @@
         self._decodedvalues = []
         return l
 
+class bufferingdecoder(object):
+    """A CBOR decoder that buffers undecoded input.
+
+    This is a glorified wrapper around ``sansiodecoder`` that adds a buffering
+    layer. All input that isn't consumed by ``sansiodecoder`` will be buffered
+    and concatenated with any new input that arrives later.
+
+    TODO consider adding limits as to the maximum amount of data that can
+    be buffered.
+    """
+    def __init__(self):
+        self._decoder = sansiodecoder()
+        self._leftover = None
+
+    def decode(self, b):
+        """Attempt to decode bytes to CBOR values.
+
+        Returns a tuple with the following fields:
+
+        * Bool indicating whether new values are available for retrieval.
+        * Integer number of bytes decoded from the new input.
+        * Integer number of bytes wanted to decode the next value.
+        """
+
+        if self._leftover:
+            oldlen = len(self._leftover)
+            b = self._leftover + b
+            self._leftover = None
+        else:
+            b = b
+            oldlen = 0
+
+        available, readcount, wanted = self._decoder.decode(b)
+
+        if readcount < len(b):
+            self._leftover = b[readcount:]
+
+        return available, readcount - oldlen, wanted
+
+    def getavailable(self):
+        return self._decoder.getavailable()
+
 def decodeall(b):
     """Decode all CBOR items present in an iterable of bytes.
 
--- a/tests/test-cbor.py	Fri Aug 31 15:54:17 2018 -0700
+++ b/tests/test-cbor.py	Wed Aug 29 14:29:01 2018 -0700
@@ -941,6 +941,30 @@
         decoder = cborutil.sansiodecoder()
         self.assertEqual(decoder.decode(b''), (False, 0, 0))
 
+class BufferingDecoderTests(TestCase):
+    def testsimple(self):
+        source = [
+            b'foobar',
+            b'x' * 128,
+            {b'foo': b'bar'},
+            True,
+            False,
+            None,
+            [None for i in range(128)],
+        ]
+
+        encoded = b''.join(cborutil.streamencode(source))
+
+        for step in range(1, 32):
+            decoder = cborutil.bufferingdecoder()
+            start = 0
+
+            while start < len(encoded):
+                decoder.decode(encoded[start:start + step])
+                start += step
+
+            self.assertEqual(decoder.getavailable(), [source])
+
 class DecodeallTests(TestCase):
     def testemptyinput(self):
         self.assertEqual(cborutil.decodeall(b''), [])