Mercurial > hg
comparison mercurial/util.py @ 42041:3e47d1ec9da5
util: extract compression code in `mercurial.utils.compression`
The code seems large enough to be worth extracting. This is similar to what was
done for various module in `mercurial/utils/`.
Since None of the compression logic takes a `ui` objet, issuing deprecation
warning is tricky. Luckly the logic does not seems to have many external users.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 27 Mar 2019 16:45:14 +0100 |
parents | 7f63ec6969f3 |
children | d8e55c0c642c |
comparison
equal
deleted
inserted
replaced
42040:02fa567f8a3c | 42041:3e47d1ec9da5 |
---|---|
14 """ | 14 """ |
15 | 15 |
16 from __future__ import absolute_import, print_function | 16 from __future__ import absolute_import, print_function |
17 | 17 |
18 import abc | 18 import abc |
19 import bz2 | |
20 import collections | 19 import collections |
21 import contextlib | 20 import contextlib |
22 import errno | 21 import errno |
23 import gc | 22 import gc |
24 import hashlib | 23 import hashlib |
32 import stat | 31 import stat |
33 import sys | 32 import sys |
34 import time | 33 import time |
35 import traceback | 34 import traceback |
36 import warnings | 35 import warnings |
37 import zlib | |
38 | 36 |
39 from .thirdparty import ( | 37 from .thirdparty import ( |
40 attr, | 38 attr, |
41 ) | 39 ) |
42 from hgdemandimport import tracing | 40 from hgdemandimport import tracing |
48 policy, | 46 policy, |
49 pycompat, | 47 pycompat, |
50 urllibcompat, | 48 urllibcompat, |
51 ) | 49 ) |
52 from .utils import ( | 50 from .utils import ( |
51 compression, | |
53 procutil, | 52 procutil, |
54 stringutil, | 53 stringutil, |
55 ) | 54 ) |
56 | 55 |
57 base85 = policy.importmod(r'base85') | 56 base85 = policy.importmod(r'base85') |
124 statisexec = platform.statisexec | 123 statisexec = platform.statisexec |
125 statislink = platform.statislink | 124 statislink = platform.statislink |
126 umask = platform.umask | 125 umask = platform.umask |
127 unlink = platform.unlink | 126 unlink = platform.unlink |
128 username = platform.username | 127 username = platform.username |
128 | |
129 # small compat layer | |
130 compengines = compression.compengines | |
131 SERVERROLE = compression.SERVERROLE | |
132 CLIENTROLE = compression.CLIENTROLE | |
129 | 133 |
130 try: | 134 try: |
131 recvfds = osutil.recvfds | 135 recvfds = osutil.recvfds |
132 except AttributeError: | 136 except AttributeError: |
133 pass | 137 pass |
3204 pos = path.rfind('/') | 3208 pos = path.rfind('/') |
3205 while pos != -1: | 3209 while pos != -1: |
3206 yield path[:pos] | 3210 yield path[:pos] |
3207 pos = path.rfind('/', 0, pos) | 3211 pos = path.rfind('/', 0, pos) |
3208 | 3212 |
3209 # compression code | |
3210 | |
3211 SERVERROLE = 'server' | |
3212 CLIENTROLE = 'client' | |
3213 | |
3214 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport', | |
3215 (r'name', r'serverpriority', | |
3216 r'clientpriority')) | |
3217 | |
3218 class compressormanager(object): | |
3219 """Holds registrations of various compression engines. | |
3220 | |
3221 This class essentially abstracts the differences between compression | |
3222 engines to allow new compression formats to be added easily, possibly from | |
3223 extensions. | |
3224 | |
3225 Compressors are registered against the global instance by calling its | |
3226 ``register()`` method. | |
3227 """ | |
3228 def __init__(self): | |
3229 self._engines = {} | |
3230 # Bundle spec human name to engine name. | |
3231 self._bundlenames = {} | |
3232 # Internal bundle identifier to engine name. | |
3233 self._bundletypes = {} | |
3234 # Revlog header to engine name. | |
3235 self._revlogheaders = {} | |
3236 # Wire proto identifier to engine name. | |
3237 self._wiretypes = {} | |
3238 | |
3239 def __getitem__(self, key): | |
3240 return self._engines[key] | |
3241 | |
3242 def __contains__(self, key): | |
3243 return key in self._engines | |
3244 | |
3245 def __iter__(self): | |
3246 return iter(self._engines.keys()) | |
3247 | |
3248 def register(self, engine): | |
3249 """Register a compression engine with the manager. | |
3250 | |
3251 The argument must be a ``compressionengine`` instance. | |
3252 """ | |
3253 if not isinstance(engine, compressionengine): | |
3254 raise ValueError(_('argument must be a compressionengine')) | |
3255 | |
3256 name = engine.name() | |
3257 | |
3258 if name in self._engines: | |
3259 raise error.Abort(_('compression engine %s already registered') % | |
3260 name) | |
3261 | |
3262 bundleinfo = engine.bundletype() | |
3263 if bundleinfo: | |
3264 bundlename, bundletype = bundleinfo | |
3265 | |
3266 if bundlename in self._bundlenames: | |
3267 raise error.Abort(_('bundle name %s already registered') % | |
3268 bundlename) | |
3269 if bundletype in self._bundletypes: | |
3270 raise error.Abort(_('bundle type %s already registered by %s') % | |
3271 (bundletype, self._bundletypes[bundletype])) | |
3272 | |
3273 # No external facing name declared. | |
3274 if bundlename: | |
3275 self._bundlenames[bundlename] = name | |
3276 | |
3277 self._bundletypes[bundletype] = name | |
3278 | |
3279 wiresupport = engine.wireprotosupport() | |
3280 if wiresupport: | |
3281 wiretype = wiresupport.name | |
3282 if wiretype in self._wiretypes: | |
3283 raise error.Abort(_('wire protocol compression %s already ' | |
3284 'registered by %s') % | |
3285 (wiretype, self._wiretypes[wiretype])) | |
3286 | |
3287 self._wiretypes[wiretype] = name | |
3288 | |
3289 revlogheader = engine.revlogheader() | |
3290 if revlogheader and revlogheader in self._revlogheaders: | |
3291 raise error.Abort(_('revlog header %s already registered by %s') % | |
3292 (revlogheader, self._revlogheaders[revlogheader])) | |
3293 | |
3294 if revlogheader: | |
3295 self._revlogheaders[revlogheader] = name | |
3296 | |
3297 self._engines[name] = engine | |
3298 | |
3299 @property | |
3300 def supportedbundlenames(self): | |
3301 return set(self._bundlenames.keys()) | |
3302 | |
3303 @property | |
3304 def supportedbundletypes(self): | |
3305 return set(self._bundletypes.keys()) | |
3306 | |
3307 def forbundlename(self, bundlename): | |
3308 """Obtain a compression engine registered to a bundle name. | |
3309 | |
3310 Will raise KeyError if the bundle type isn't registered. | |
3311 | |
3312 Will abort if the engine is known but not available. | |
3313 """ | |
3314 engine = self._engines[self._bundlenames[bundlename]] | |
3315 if not engine.available(): | |
3316 raise error.Abort(_('compression engine %s could not be loaded') % | |
3317 engine.name()) | |
3318 return engine | |
3319 | |
3320 def forbundletype(self, bundletype): | |
3321 """Obtain a compression engine registered to a bundle type. | |
3322 | |
3323 Will raise KeyError if the bundle type isn't registered. | |
3324 | |
3325 Will abort if the engine is known but not available. | |
3326 """ | |
3327 engine = self._engines[self._bundletypes[bundletype]] | |
3328 if not engine.available(): | |
3329 raise error.Abort(_('compression engine %s could not be loaded') % | |
3330 engine.name()) | |
3331 return engine | |
3332 | |
3333 def supportedwireengines(self, role, onlyavailable=True): | |
3334 """Obtain compression engines that support the wire protocol. | |
3335 | |
3336 Returns a list of engines in prioritized order, most desired first. | |
3337 | |
3338 If ``onlyavailable`` is set, filter out engines that can't be | |
3339 loaded. | |
3340 """ | |
3341 assert role in (SERVERROLE, CLIENTROLE) | |
3342 | |
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority' | |
3344 | |
3345 engines = [self._engines[e] for e in self._wiretypes.values()] | |
3346 if onlyavailable: | |
3347 engines = [e for e in engines if e.available()] | |
3348 | |
3349 def getkey(e): | |
3350 # Sort first by priority, highest first. In case of tie, sort | |
3351 # alphabetically. This is arbitrary, but ensures output is | |
3352 # stable. | |
3353 w = e.wireprotosupport() | |
3354 return -1 * getattr(w, attr), w.name | |
3355 | |
3356 return list(sorted(engines, key=getkey)) | |
3357 | |
3358 def forwiretype(self, wiretype): | |
3359 engine = self._engines[self._wiretypes[wiretype]] | |
3360 if not engine.available(): | |
3361 raise error.Abort(_('compression engine %s could not be loaded') % | |
3362 engine.name()) | |
3363 return engine | |
3364 | |
3365 def forrevlogheader(self, header): | |
3366 """Obtain a compression engine registered to a revlog header. | |
3367 | |
3368 Will raise KeyError if the revlog header value isn't registered. | |
3369 """ | |
3370 return self._engines[self._revlogheaders[header]] | |
3371 | |
3372 compengines = compressormanager() | |
3373 | |
3374 class compressionengine(object): | |
3375 """Base class for compression engines. | |
3376 | |
3377 Compression engines must implement the interface defined by this class. | |
3378 """ | |
3379 def name(self): | |
3380 """Returns the name of the compression engine. | |
3381 | |
3382 This is the key the engine is registered under. | |
3383 | |
3384 This method must be implemented. | |
3385 """ | |
3386 raise NotImplementedError() | |
3387 | |
3388 def available(self): | |
3389 """Whether the compression engine is available. | |
3390 | |
3391 The intent of this method is to allow optional compression engines | |
3392 that may not be available in all installations (such as engines relying | |
3393 on C extensions that may not be present). | |
3394 """ | |
3395 return True | |
3396 | |
3397 def bundletype(self): | |
3398 """Describes bundle identifiers for this engine. | |
3399 | |
3400 If this compression engine isn't supported for bundles, returns None. | |
3401 | |
3402 If this engine can be used for bundles, returns a 2-tuple of strings of | |
3403 the user-facing "bundle spec" compression name and an internal | |
3404 identifier used to denote the compression format within bundles. To | |
3405 exclude the name from external usage, set the first element to ``None``. | |
3406 | |
3407 If bundle compression is supported, the class must also implement | |
3408 ``compressstream`` and `decompressorreader``. | |
3409 | |
3410 The docstring of this method is used in the help system to tell users | |
3411 about this engine. | |
3412 """ | |
3413 return None | |
3414 | |
3415 def wireprotosupport(self): | |
3416 """Declare support for this compression format on the wire protocol. | |
3417 | |
3418 If this compression engine isn't supported for compressing wire | |
3419 protocol payloads, returns None. | |
3420 | |
3421 Otherwise, returns ``compenginewireprotosupport`` with the following | |
3422 fields: | |
3423 | |
3424 * String format identifier | |
3425 * Integer priority for the server | |
3426 * Integer priority for the client | |
3427 | |
3428 The integer priorities are used to order the advertisement of format | |
3429 support by server and client. The highest integer is advertised | |
3430 first. Integers with non-positive values aren't advertised. | |
3431 | |
3432 The priority values are somewhat arbitrary and only used for default | |
3433 ordering. The relative order can be changed via config options. | |
3434 | |
3435 If wire protocol compression is supported, the class must also implement | |
3436 ``compressstream`` and ``decompressorreader``. | |
3437 """ | |
3438 return None | |
3439 | |
3440 def revlogheader(self): | |
3441 """Header added to revlog chunks that identifies this engine. | |
3442 | |
3443 If this engine can be used to compress revlogs, this method should | |
3444 return the bytes used to identify chunks compressed with this engine. | |
3445 Else, the method should return ``None`` to indicate it does not | |
3446 participate in revlog compression. | |
3447 """ | |
3448 return None | |
3449 | |
3450 def compressstream(self, it, opts=None): | |
3451 """Compress an iterator of chunks. | |
3452 | |
3453 The method receives an iterator (ideally a generator) of chunks of | |
3454 bytes to be compressed. It returns an iterator (ideally a generator) | |
3455 of bytes of chunks representing the compressed output. | |
3456 | |
3457 Optionally accepts an argument defining how to perform compression. | |
3458 Each engine treats this argument differently. | |
3459 """ | |
3460 raise NotImplementedError() | |
3461 | |
3462 def decompressorreader(self, fh): | |
3463 """Perform decompression on a file object. | |
3464 | |
3465 Argument is an object with a ``read(size)`` method that returns | |
3466 compressed data. Return value is an object with a ``read(size)`` that | |
3467 returns uncompressed data. | |
3468 """ | |
3469 raise NotImplementedError() | |
3470 | |
3471 def revlogcompressor(self, opts=None): | |
3472 """Obtain an object that can be used to compress revlog entries. | |
3473 | |
3474 The object has a ``compress(data)`` method that compresses binary | |
3475 data. This method returns compressed binary data or ``None`` if | |
3476 the data could not be compressed (too small, not compressible, etc). | |
3477 The returned data should have a header uniquely identifying this | |
3478 compression format so decompression can be routed to this engine. | |
3479 This header should be identified by the ``revlogheader()`` return | |
3480 value. | |
3481 | |
3482 The object has a ``decompress(data)`` method that decompresses | |
3483 data. The method will only be called if ``data`` begins with | |
3484 ``revlogheader()``. The method should return the raw, uncompressed | |
3485 data or raise a ``StorageError``. | |
3486 | |
3487 The object is reusable but is not thread safe. | |
3488 """ | |
3489 raise NotImplementedError() | |
3490 | |
3491 class _CompressedStreamReader(object): | |
3492 def __init__(self, fh): | |
3493 if safehasattr(fh, 'unbufferedread'): | |
3494 self._reader = fh.unbufferedread | |
3495 else: | |
3496 self._reader = fh.read | |
3497 self._pending = [] | |
3498 self._pos = 0 | |
3499 self._eof = False | |
3500 | |
3501 def _decompress(self, chunk): | |
3502 raise NotImplementedError() | |
3503 | |
3504 def read(self, l): | |
3505 buf = [] | |
3506 while True: | |
3507 while self._pending: | |
3508 if len(self._pending[0]) > l + self._pos: | |
3509 newbuf = self._pending[0] | |
3510 buf.append(newbuf[self._pos:self._pos + l]) | |
3511 self._pos += l | |
3512 return ''.join(buf) | |
3513 | |
3514 newbuf = self._pending.pop(0) | |
3515 if self._pos: | |
3516 buf.append(newbuf[self._pos:]) | |
3517 l -= len(newbuf) - self._pos | |
3518 else: | |
3519 buf.append(newbuf) | |
3520 l -= len(newbuf) | |
3521 self._pos = 0 | |
3522 | |
3523 if self._eof: | |
3524 return ''.join(buf) | |
3525 chunk = self._reader(65536) | |
3526 self._decompress(chunk) | |
3527 if not chunk and not self._pending and not self._eof: | |
3528 # No progress and no new data, bail out | |
3529 return ''.join(buf) | |
3530 | |
3531 class _GzipCompressedStreamReader(_CompressedStreamReader): | |
3532 def __init__(self, fh): | |
3533 super(_GzipCompressedStreamReader, self).__init__(fh) | |
3534 self._decompobj = zlib.decompressobj() | |
3535 def _decompress(self, chunk): | |
3536 newbuf = self._decompobj.decompress(chunk) | |
3537 if newbuf: | |
3538 self._pending.append(newbuf) | |
3539 d = self._decompobj.copy() | |
3540 try: | |
3541 d.decompress('x') | |
3542 d.flush() | |
3543 if d.unused_data == 'x': | |
3544 self._eof = True | |
3545 except zlib.error: | |
3546 pass | |
3547 | |
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader): | |
3549 def __init__(self, fh): | |
3550 super(_BZ2CompressedStreamReader, self).__init__(fh) | |
3551 self._decompobj = bz2.BZ2Decompressor() | |
3552 def _decompress(self, chunk): | |
3553 newbuf = self._decompobj.decompress(chunk) | |
3554 if newbuf: | |
3555 self._pending.append(newbuf) | |
3556 try: | |
3557 while True: | |
3558 newbuf = self._decompobj.decompress('') | |
3559 if newbuf: | |
3560 self._pending.append(newbuf) | |
3561 else: | |
3562 break | |
3563 except EOFError: | |
3564 self._eof = True | |
3565 | |
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader): | |
3567 def __init__(self, fh): | |
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh) | |
3569 newbuf = self._decompobj.decompress('BZ') | |
3570 if newbuf: | |
3571 self._pending.append(newbuf) | |
3572 | |
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader): | |
3574 def __init__(self, fh, zstd): | |
3575 super(_ZstdCompressedStreamReader, self).__init__(fh) | |
3576 self._zstd = zstd | |
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj() | |
3578 def _decompress(self, chunk): | |
3579 newbuf = self._decompobj.decompress(chunk) | |
3580 if newbuf: | |
3581 self._pending.append(newbuf) | |
3582 try: | |
3583 while True: | |
3584 newbuf = self._decompobj.decompress('') | |
3585 if newbuf: | |
3586 self._pending.append(newbuf) | |
3587 else: | |
3588 break | |
3589 except self._zstd.ZstdError: | |
3590 self._eof = True | |
3591 | |
3592 class _zlibengine(compressionengine): | |
3593 def name(self): | |
3594 return 'zlib' | |
3595 | |
3596 def bundletype(self): | |
3597 """zlib compression using the DEFLATE algorithm. | |
3598 | |
3599 All Mercurial clients should support this format. The compression | |
3600 algorithm strikes a reasonable balance between compression ratio | |
3601 and size. | |
3602 """ | |
3603 return 'gzip', 'GZ' | |
3604 | |
3605 def wireprotosupport(self): | |
3606 return compewireprotosupport('zlib', 20, 20) | |
3607 | |
3608 def revlogheader(self): | |
3609 return 'x' | |
3610 | |
3611 def compressstream(self, it, opts=None): | |
3612 opts = opts or {} | |
3613 | |
3614 z = zlib.compressobj(opts.get('level', -1)) | |
3615 for chunk in it: | |
3616 data = z.compress(chunk) | |
3617 # Not all calls to compress emit data. It is cheaper to inspect | |
3618 # here than to feed empty chunks through generator. | |
3619 if data: | |
3620 yield data | |
3621 | |
3622 yield z.flush() | |
3623 | |
3624 def decompressorreader(self, fh): | |
3625 return _GzipCompressedStreamReader(fh) | |
3626 | |
3627 class zlibrevlogcompressor(object): | |
3628 def compress(self, data): | |
3629 insize = len(data) | |
3630 # Caller handles empty input case. | |
3631 assert insize > 0 | |
3632 | |
3633 if insize < 44: | |
3634 return None | |
3635 | |
3636 elif insize <= 1000000: | |
3637 compressed = zlib.compress(data) | |
3638 if len(compressed) < insize: | |
3639 return compressed | |
3640 return None | |
3641 | |
3642 # zlib makes an internal copy of the input buffer, doubling | |
3643 # memory usage for large inputs. So do streaming compression | |
3644 # on large inputs. | |
3645 else: | |
3646 z = zlib.compressobj() | |
3647 parts = [] | |
3648 pos = 0 | |
3649 while pos < insize: | |
3650 pos2 = pos + 2**20 | |
3651 parts.append(z.compress(data[pos:pos2])) | |
3652 pos = pos2 | |
3653 parts.append(z.flush()) | |
3654 | |
3655 if sum(map(len, parts)) < insize: | |
3656 return ''.join(parts) | |
3657 return None | |
3658 | |
3659 def decompress(self, data): | |
3660 try: | |
3661 return zlib.decompress(data) | |
3662 except zlib.error as e: | |
3663 raise error.StorageError(_('revlog decompress error: %s') % | |
3664 stringutil.forcebytestr(e)) | |
3665 | |
3666 def revlogcompressor(self, opts=None): | |
3667 return self.zlibrevlogcompressor() | |
3668 | |
3669 compengines.register(_zlibengine()) | |
3670 | |
3671 class _bz2engine(compressionengine): | |
3672 def name(self): | |
3673 return 'bz2' | |
3674 | |
3675 def bundletype(self): | |
3676 """An algorithm that produces smaller bundles than ``gzip``. | |
3677 | |
3678 All Mercurial clients should support this format. | |
3679 | |
3680 This engine will likely produce smaller bundles than ``gzip`` but | |
3681 will be significantly slower, both during compression and | |
3682 decompression. | |
3683 | |
3684 If available, the ``zstd`` engine can yield similar or better | |
3685 compression at much higher speeds. | |
3686 """ | |
3687 return 'bzip2', 'BZ' | |
3688 | |
3689 # We declare a protocol name but don't advertise by default because | |
3690 # it is slow. | |
3691 def wireprotosupport(self): | |
3692 return compewireprotosupport('bzip2', 0, 0) | |
3693 | |
3694 def compressstream(self, it, opts=None): | |
3695 opts = opts or {} | |
3696 z = bz2.BZ2Compressor(opts.get('level', 9)) | |
3697 for chunk in it: | |
3698 data = z.compress(chunk) | |
3699 if data: | |
3700 yield data | |
3701 | |
3702 yield z.flush() | |
3703 | |
3704 def decompressorreader(self, fh): | |
3705 return _BZ2CompressedStreamReader(fh) | |
3706 | |
3707 compengines.register(_bz2engine()) | |
3708 | |
3709 class _truncatedbz2engine(compressionengine): | |
3710 def name(self): | |
3711 return 'bz2truncated' | |
3712 | |
3713 def bundletype(self): | |
3714 return None, '_truncatedBZ' | |
3715 | |
3716 # We don't implement compressstream because it is hackily handled elsewhere. | |
3717 | |
3718 def decompressorreader(self, fh): | |
3719 return _TruncatedBZ2CompressedStreamReader(fh) | |
3720 | |
3721 compengines.register(_truncatedbz2engine()) | |
3722 | |
3723 class _noopengine(compressionengine): | |
3724 def name(self): | |
3725 return 'none' | |
3726 | |
3727 def bundletype(self): | |
3728 """No compression is performed. | |
3729 | |
3730 Use this compression engine to explicitly disable compression. | |
3731 """ | |
3732 return 'none', 'UN' | |
3733 | |
3734 # Clients always support uncompressed payloads. Servers don't because | |
3735 # unless you are on a fast network, uncompressed payloads can easily | |
3736 # saturate your network pipe. | |
3737 def wireprotosupport(self): | |
3738 return compewireprotosupport('none', 0, 10) | |
3739 | |
3740 # We don't implement revlogheader because it is handled specially | |
3741 # in the revlog class. | |
3742 | |
3743 def compressstream(self, it, opts=None): | |
3744 return it | |
3745 | |
3746 def decompressorreader(self, fh): | |
3747 return fh | |
3748 | |
3749 class nooprevlogcompressor(object): | |
3750 def compress(self, data): | |
3751 return None | |
3752 | |
3753 def revlogcompressor(self, opts=None): | |
3754 return self.nooprevlogcompressor() | |
3755 | |
3756 compengines.register(_noopengine()) | |
3757 | |
3758 class _zstdengine(compressionengine): | |
3759 def name(self): | |
3760 return 'zstd' | |
3761 | |
3762 @propertycache | |
3763 def _module(self): | |
3764 # Not all installs have the zstd module available. So defer importing | |
3765 # until first access. | |
3766 try: | |
3767 from . import zstd | |
3768 # Force delayed import. | |
3769 zstd.__version__ | |
3770 return zstd | |
3771 except ImportError: | |
3772 return None | |
3773 | |
3774 def available(self): | |
3775 return bool(self._module) | |
3776 | |
3777 def bundletype(self): | |
3778 """A modern compression algorithm that is fast and highly flexible. | |
3779 | |
3780 Only supported by Mercurial 4.1 and newer clients. | |
3781 | |
3782 With the default settings, zstd compression is both faster and yields | |
3783 better compression than ``gzip``. It also frequently yields better | |
3784 compression than ``bzip2`` while operating at much higher speeds. | |
3785 | |
3786 If this engine is available and backwards compatibility is not a | |
3787 concern, it is likely the best available engine. | |
3788 """ | |
3789 return 'zstd', 'ZS' | |
3790 | |
3791 def wireprotosupport(self): | |
3792 return compewireprotosupport('zstd', 50, 50) | |
3793 | |
3794 def revlogheader(self): | |
3795 return '\x28' | |
3796 | |
3797 def compressstream(self, it, opts=None): | |
3798 opts = opts or {} | |
3799 # zstd level 3 is almost always significantly faster than zlib | |
3800 # while providing no worse compression. It strikes a good balance | |
3801 # between speed and compression. | |
3802 level = opts.get('level', 3) | |
3803 | |
3804 zstd = self._module | |
3805 z = zstd.ZstdCompressor(level=level).compressobj() | |
3806 for chunk in it: | |
3807 data = z.compress(chunk) | |
3808 if data: | |
3809 yield data | |
3810 | |
3811 yield z.flush() | |
3812 | |
3813 def decompressorreader(self, fh): | |
3814 return _ZstdCompressedStreamReader(fh, self._module) | |
3815 | |
3816 class zstdrevlogcompressor(object): | |
3817 def __init__(self, zstd, level=3): | |
3818 # TODO consider omitting frame magic to save 4 bytes. | |
3819 # This writes content sizes into the frame header. That is | |
3820 # extra storage. But it allows a correct size memory allocation | |
3821 # to hold the result. | |
3822 self._cctx = zstd.ZstdCompressor(level=level) | |
3823 self._dctx = zstd.ZstdDecompressor() | |
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE | |
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
3826 | |
3827 def compress(self, data): | |
3828 insize = len(data) | |
3829 # Caller handles empty input case. | |
3830 assert insize > 0 | |
3831 | |
3832 if insize < 50: | |
3833 return None | |
3834 | |
3835 elif insize <= 1000000: | |
3836 compressed = self._cctx.compress(data) | |
3837 if len(compressed) < insize: | |
3838 return compressed | |
3839 return None | |
3840 else: | |
3841 z = self._cctx.compressobj() | |
3842 chunks = [] | |
3843 pos = 0 | |
3844 while pos < insize: | |
3845 pos2 = pos + self._compinsize | |
3846 chunk = z.compress(data[pos:pos2]) | |
3847 if chunk: | |
3848 chunks.append(chunk) | |
3849 pos = pos2 | |
3850 chunks.append(z.flush()) | |
3851 | |
3852 if sum(map(len, chunks)) < insize: | |
3853 return ''.join(chunks) | |
3854 return None | |
3855 | |
3856 def decompress(self, data): | |
3857 insize = len(data) | |
3858 | |
3859 try: | |
3860 # This was measured to be faster than other streaming | |
3861 # decompressors. | |
3862 dobj = self._dctx.decompressobj() | |
3863 chunks = [] | |
3864 pos = 0 | |
3865 while pos < insize: | |
3866 pos2 = pos + self._decompinsize | |
3867 chunk = dobj.decompress(data[pos:pos2]) | |
3868 if chunk: | |
3869 chunks.append(chunk) | |
3870 pos = pos2 | |
3871 # Frame should be exhausted, so no finish() API. | |
3872 | |
3873 return ''.join(chunks) | |
3874 except Exception as e: | |
3875 raise error.StorageError(_('revlog decompress error: %s') % | |
3876 stringutil.forcebytestr(e)) | |
3877 | |
3878 def revlogcompressor(self, opts=None): | |
3879 opts = opts or {} | |
3880 return self.zstdrevlogcompressor(self._module, | |
3881 level=opts.get('level', 3)) | |
3882 | |
3883 compengines.register(_zstdengine()) | |
3884 | |
3885 def bundlecompressiontopics(): | |
3886 """Obtains a list of available bundle compressions for use in help.""" | |
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__. | |
3888 items = {} | |
3889 | |
3890 # We need to format the docstring. So use a dummy object/type to hold it | |
3891 # rather than mutating the original. | |
3892 class docobject(object): | |
3893 pass | |
3894 | |
3895 for name in compengines: | |
3896 engine = compengines[name] | |
3897 | |
3898 if not engine.available(): | |
3899 continue | |
3900 | |
3901 bt = engine.bundletype() | |
3902 if not bt or not bt[0]: | |
3903 continue | |
3904 | |
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype)) | |
3906 | |
3907 value = docobject() | |
3908 value.__doc__ = pycompat.sysstr(doc) | |
3909 value._origdoc = engine.bundletype.__doc__ | |
3910 value._origfunc = engine.bundletype | |
3911 | |
3912 items[bt[0]] = value | |
3913 | |
3914 return items | |
3915 | |
3916 i18nfunctions = bundlecompressiontopics().values() | |
3917 | 3213 |
3918 # convenient shortcut | 3214 # convenient shortcut |
3919 dst = debugstacktrace | 3215 dst = debugstacktrace |
3920 | 3216 |
3921 def safename(f, tag, ctx, others=None): | 3217 def safename(f, tag, ctx, others=None): |