# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1483384972 28800
# Node ID 78ac56aebab67b0e08242243085b8260bc9b3866
# Parent  31e1f0d4ab44b9d7ac4a96244dee753bd5c823e6
revlog: use compression engine API for compression

This commit swaps in the just-added revlog compressor API into
the revlog class.

Instead of implementing zlib compression inline in compress(), we
now store a cached-on-first-use revlog compressor on each revlog
instance and invoke its "compress()" method.

As part of this, revlog.compress() has been refactored a bit to use
a cleaner code flow and modern formatting (e.g. avoiding
parenthesis around returned tuples).

On a mozilla-unified repo, here are the "compress" times for a few
commands:

$ hg perfrevlogchunks -c
! wall 5.772450 comb 5.780000 user 5.780000 sys 0.000000 (best of 3)
! wall 5.795158 comb 5.790000 user 5.790000 sys 0.000000 (best of 3)

$ hg perfrevlogchunks -m
! wall 9.975789 comb 9.970000 user 9.970000 sys 0.000000 (best of 3)
! wall 10.019505 comb 10.010000 user 10.010000 sys 0.000000 (best of 3)

Compression times did seem to slow down just a little. There are
360,210 changelog revisions and 359,342 manifest revisions. For the
changelog, mean time to compress a revision increased from ~16.025us to
~16.088us. That's basically a function call or an attribute lookup. I
suppose this is the price you pay for abstraction. It's so low that
I'm not concerned.

diff -r 31e1f0d4ab44 -r 78ac56aebab6 mercurial/revlog.py
--- a/mercurial/revlog.py	Mon Jan 02 12:39:03 2017 -0800
+++ b/mercurial/revlog.py	Mon Jan 02 11:22:52 2017 -0800
@@ -39,7 +39,6 @@
 
 _pack = struct.pack
 _unpack = struct.unpack
-_compress = zlib.compress
 _decompress = zlib.decompress
 
 # revlog header flags
@@ -341,6 +340,10 @@
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = {}
 
+    @util.propertycache
+    def _compressor(self):
+        return util.compengines['zlib'].revlogcompressor()
+
     def tip(self):
         return self.node(len(self.index) - 2)
     def __contains__(self, rev):
@@ -1465,34 +1468,20 @@
                 dfh.close()
             ifh.close()
 
-    def compress(self, text):
-        """ generate a possibly-compressed representation of text """
-        if not text:
-            return ("", text)
-        l = len(text)
-        bin = None
-        if l < 44:
-            pass
-        elif l > 1000000:
-            # zlib makes an internal copy, thus doubling memory usage for
-            # large files, so lets do this in pieces
-            z = zlib.compressobj()
-            p = []
-            pos = 0
-            while pos < l:
-                pos2 = pos + 2**20
-                p.append(z.compress(text[pos:pos2]))
-                pos = pos2
-            p.append(z.flush())
-            if sum(map(len, p)) < l:
-                bin = "".join(p)
-        else:
-            bin = _compress(text)
-        if bin is None or len(bin) >= l:
-            if text[0] == '\0':
-                return ("", text)
-            return ('u', text)
-        return ("", bin)
+    def compress(self, data):
+        """Generate a possibly-compressed representation of data."""
+        if not data:
+            return '', data
+
+        compressed = self._compressor.compress(data)
+
+        if compressed:
+            # The revlog compressor added the header in the returned data.
+            return '', compressed
+
+        if data[0] == '\0':
+            return '', data
+        return 'u', data
 
     def decompress(self, data):
         """Decompress a revlog chunk.