Mercurial > hg
comparison mercurial/revlog.py @ 30795:78ac56aebab6
revlog: use compression engine API for compression
This commit swaps in the just-added revlog compressor API into
the revlog class.
Instead of implementing zlib compression inline in compress(), we
now store a cached-on-first-use revlog compressor on each revlog
instance and invoke its "compress()" method.
As part of this, revlog.compress() has been refactored a bit to use
a cleaner code flow and modern formatting (e.g. avoiding
parenthesis around returned tuples).
On a mozilla-unified repo, here are the "compress" times for a few
commands:
$ hg perfrevlogchunks -c
! wall 5.772450 comb 5.780000 user 5.780000 sys 0.000000 (best of 3)
! wall 5.795158 comb 5.790000 user 5.790000 sys 0.000000 (best of 3)
$ hg perfrevlogchunks -m
! wall 9.975789 comb 9.970000 user 9.970000 sys 0.000000 (best of 3)
! wall 10.019505 comb 10.010000 user 10.010000 sys 0.000000 (best of 3)
Compression times did seem to slow down just a little. There are
360,210 changelog revisions and 359,342 manifest revisions. For the
changelog, mean time to compress a revision increased from ~16.025us to
~16.088us. That's basically a function call or an attribute lookup. I
suppose this is the price you pay for abstraction. It's so low that
I'm not concerned.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 02 Jan 2017 11:22:52 -0800 |
parents | b6f455a6e4d6 |
children | 2b279126b8f5 |
comparison
equal
deleted
inserted
replaced
30794:31e1f0d4ab44 | 30795:78ac56aebab6 |
---|---|
37 util, | 37 util, |
38 ) | 38 ) |
39 | 39 |
40 _pack = struct.pack | 40 _pack = struct.pack |
41 _unpack = struct.unpack | 41 _unpack = struct.unpack |
42 _compress = zlib.compress | |
43 _decompress = zlib.decompress | 42 _decompress = zlib.decompress |
44 | 43 |
45 # revlog header flags | 44 # revlog header flags |
46 REVLOGV0 = 0 | 45 REVLOGV0 = 0 |
47 REVLOGNG = 1 | 46 REVLOGNG = 1 |
338 self.nodemap = self._nodecache = nodemap | 337 self.nodemap = self._nodecache = nodemap |
339 if not self._chunkcache: | 338 if not self._chunkcache: |
340 self._chunkclear() | 339 self._chunkclear() |
341 # revnum -> (chain-length, sum-delta-length) | 340 # revnum -> (chain-length, sum-delta-length) |
342 self._chaininfocache = {} | 341 self._chaininfocache = {} |
342 | |
343 @util.propertycache | |
344 def _compressor(self): | |
345 return util.compengines['zlib'].revlogcompressor() | |
343 | 346 |
344 def tip(self): | 347 def tip(self): |
345 return self.node(len(self.index) - 2) | 348 return self.node(len(self.index) - 2) |
346 def __contains__(self, rev): | 349 def __contains__(self, rev): |
347 return 0 <= rev < len(self) | 350 return 0 <= rev < len(self) |
1463 finally: | 1466 finally: |
1464 if dfh: | 1467 if dfh: |
1465 dfh.close() | 1468 dfh.close() |
1466 ifh.close() | 1469 ifh.close() |
1467 | 1470 |
1468 def compress(self, text): | 1471 def compress(self, data): |
1469 """ generate a possibly-compressed representation of text """ | 1472 """Generate a possibly-compressed representation of data.""" |
1470 if not text: | 1473 if not data: |
1471 return ("", text) | 1474 return '', data |
1472 l = len(text) | 1475 |
1473 bin = None | 1476 compressed = self._compressor.compress(data) |
1474 if l < 44: | 1477 |
1475 pass | 1478 if compressed: |
1476 elif l > 1000000: | 1479 # The revlog compressor added the header in the returned data. |
1477 # zlib makes an internal copy, thus doubling memory usage for | 1480 return '', compressed |
1478 # large files, so lets do this in pieces | 1481 |
1479 z = zlib.compressobj() | 1482 if data[0] == '\0': |
1480 p = [] | 1483 return '', data |
1481 pos = 0 | 1484 return 'u', data |
1482 while pos < l: | |
1483 pos2 = pos + 2**20 | |
1484 p.append(z.compress(text[pos:pos2])) | |
1485 pos = pos2 | |
1486 p.append(z.flush()) | |
1487 if sum(map(len, p)) < l: | |
1488 bin = "".join(p) | |
1489 else: | |
1490 bin = _compress(text) | |
1491 if bin is None or len(bin) >= l: | |
1492 if text[0] == '\0': | |
1493 return ("", text) | |
1494 return ('u', text) | |
1495 return ("", bin) | |
1496 | 1485 |
1497 def decompress(self, data): | 1486 def decompress(self, data): |
1498 """Decompress a revlog chunk. | 1487 """Decompress a revlog chunk. |
1499 | 1488 |
1500 The chunk is expected to begin with a header identifying the | 1489 The chunk is expected to begin with a header identifying the |