mercurial/util.py
changeset 30794 31e1f0d4ab44
parent 30773 c390b40fe1d7
child 30798 f50c0db50025
--- a/mercurial/util.py	Mon Jan 02 13:00:16 2017 -0800
+++ b/mercurial/util.py	Mon Jan 02 12:39:03 2017 -0800
@@ -3207,6 +3207,19 @@
         """
         raise NotImplementedError()
 
+    def revlogcompressor(self, opts=None):
+        """Obtain an object that can be used to compress revlog entries.
+
+        The object has a ``compress(data)`` method that compresses binary
+        data. This method returns compressed binary data or ``None`` if
+        the data could not be compressed (too small, not compressible, etc).
+        The returned data should have a header uniquely identifying this
+        compression format so decompression can be routed to this engine.
+
+        The object is reusable but is not thread safe.
+        """
+        raise NotImplementedError()
+
 class _zlibengine(compressionengine):
     def name(self):
         return 'zlib'
@@ -3241,6 +3254,41 @@
 
         return chunkbuffer(gen())
 
+    class zlibrevlogcompressor(object):
+        def compress(self, data):
+            insize = len(data)
+            # Caller handles empty input case.
+            assert insize > 0
+
+            if insize < 44:
+                return None
+
+            elif insize <= 1000000:
+                compressed = zlib.compress(data)
+                if len(compressed) < insize:
+                    return compressed
+                return None
+
+            # zlib makes an internal copy of the input buffer, doubling
+            # memory usage for large inputs. So do streaming compression
+            # on large inputs.
+            else:
+                z = zlib.compressobj()
+                parts = []
+                pos = 0
+                while pos < insize:
+                    pos2 = pos + 2**20
+                    parts.append(z.compress(data[pos:pos2]))
+                    pos = pos2
+                parts.append(z.flush())
+
+                if sum(map(len, parts)) < insize:
+                    return ''.join(parts)
+                return None
+
+    def revlogcompressor(self, opts=None):
+        return self.zlibrevlogcompressor()
+
 compengines.register(_zlibengine())
 
 class _bz2engine(compressionengine):
@@ -3315,6 +3363,13 @@
     def decompressorreader(self, fh):
         return fh
 
+    class nooprevlogcompressor(object):
+        def compress(self, data):
+            return None
+
+    def revlogcompressor(self, opts=None):
+        return self.nooprevlogcompressor()
+
 compengines.register(_noopengine())
 
 class _zstdengine(compressionengine):
@@ -3363,6 +3418,49 @@
         dctx = zstd.ZstdDecompressor()
         return chunkbuffer(dctx.read_from(fh))
 
+    class zstdrevlogcompressor(object):
+        def __init__(self, zstd, level=3):
+            # Writing the content size adds a few bytes to the output. However,
+            # it allows decompression to be more optimal since we can
+            # pre-allocate a buffer to hold the result.
+            self._cctx = zstd.ZstdCompressor(level=level,
+                                             write_content_size=True)
+            self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
+
+        def compress(self, data):
+            insize = len(data)
+            # Caller handles empty input case.
+            assert insize > 0
+
+            if insize < 50:
+                return None
+
+            elif insize <= 1000000:
+                compressed = self._cctx.compress(data)
+                if len(compressed) < insize:
+                    return compressed
+                return None
+            else:
+                z = self._cctx.compressobj()
+                chunks = []
+                pos = 0
+                while pos < insize:
+                    pos2 = pos + self._compinsize
+                    chunk = z.compress(data[pos:pos2])
+                    if chunk:
+                        chunks.append(chunk)
+                    pos = pos2
+                chunks.append(z.flush())
+
+                if sum(map(len, chunks)) < insize:
+                    return ''.join(chunks)
+                return None
+
+    def revlogcompressor(self, opts=None):
+        opts = opts or {}
+        return self.zstdrevlogcompressor(self._module,
+                                         level=opts.get('level', 3))
+
 compengines.register(_zstdengine())
 
 # convenient shortcut