# HG changeset patch # User Gregory Szorc # Date 1483384972 28800 # Node ID 78ac56aebab67b0e08242243085b8260bc9b3866 # Parent 31e1f0d4ab44b9d7ac4a96244dee753bd5c823e6 revlog: use compression engine API for compression This commit swaps in the just-added revlog compressor API into the revlog class. Instead of implementing zlib compression inline in compress(), we now store a cached-on-first-use revlog compressor on each revlog instance and invoke its "compress()" method. As part of this, revlog.compress() has been refactored a bit to use a cleaner code flow and modern formatting (e.g. avoiding parenthesis around returned tuples). On a mozilla-unified repo, here are the "compress" times for a few commands: $ hg perfrevlogchunks -c ! wall 5.772450 comb 5.780000 user 5.780000 sys 0.000000 (best of 3) ! wall 5.795158 comb 5.790000 user 5.790000 sys 0.000000 (best of 3) $ hg perfrevlogchunks -m ! wall 9.975789 comb 9.970000 user 9.970000 sys 0.000000 (best of 3) ! wall 10.019505 comb 10.010000 user 10.010000 sys 0.000000 (best of 3) Compression times did seem to slow down just a little. There are 360,210 changelog revisions and 359,342 manifest revisions. For the changelog, mean time to compress a revision increased from ~16.025us to ~16.088us. That's basically a function call or an attribute lookup. I suppose this is the price you pay for abstraction. It's so low that I'm not concerned. diff -r 31e1f0d4ab44 -r 78ac56aebab6 mercurial/revlog.py --- a/mercurial/revlog.py Mon Jan 02 12:39:03 2017 -0800 +++ b/mercurial/revlog.py Mon Jan 02 11:22:52 2017 -0800 @@ -39,7 +39,6 @@ _pack = struct.pack _unpack = struct.unpack -_compress = zlib.compress _decompress = zlib.decompress # revlog header flags @@ -341,6 +340,10 @@ # revnum -> (chain-length, sum-delta-length) self._chaininfocache = {} + @util.propertycache + def _compressor(self): + return util.compengines['zlib'].revlogcompressor() + def tip(self): return self.node(len(self.index) - 2) def __contains__(self, rev): @@ -1465,34 +1468,20 @@ dfh.close() ifh.close() - def compress(self, text): - """ generate a possibly-compressed representation of text """ - if not text: - return ("", text) - l = len(text) - bin = None - if l < 44: - pass - elif l > 1000000: - # zlib makes an internal copy, thus doubling memory usage for - # large files, so lets do this in pieces - z = zlib.compressobj() - p = [] - pos = 0 - while pos < l: - pos2 = pos + 2**20 - p.append(z.compress(text[pos:pos2])) - pos = pos2 - p.append(z.flush()) - if sum(map(len, p)) < l: - bin = "".join(p) - else: - bin = _compress(text) - if bin is None or len(bin) >= l: - if text[0] == '\0': - return ("", text) - return ('u', text) - return ("", bin) + def compress(self, data): + """Generate a possibly-compressed representation of data.""" + if not data: + return '', data + + compressed = self._compressor.compress(data) + + if compressed: + # The revlog compressor added the header in the returned data. + return '', compressed + + if data[0] == '\0': + return '', data + return 'u', data def decompress(self, data): """Decompress a revlog chunk.