351 index_file, |
351 index_file, |
352 data_file, |
352 data_file, |
353 sidedata_file, |
353 sidedata_file, |
354 inline, |
354 inline, |
355 data_config, |
355 data_config, |
|
356 feature_config, |
356 chunk_cache, |
357 chunk_cache, |
|
358 default_compression_header, |
357 ): |
359 ): |
358 self.opener = opener |
360 self.opener = opener |
359 self.index = index |
361 self.index = index |
360 |
362 |
361 self.__index_file = index_file |
363 self.__index_file = index_file |
362 self.data_file = data_file |
364 self.data_file = data_file |
363 self.sidedata_file = sidedata_file |
365 self.sidedata_file = sidedata_file |
364 self.inline = inline |
366 self.inline = inline |
365 self.data_config = data_config |
367 self.data_config = data_config |
|
368 self.feature_config = feature_config |
|
369 |
|
370 self._default_compression_header = default_compression_header |
366 |
371 |
367 # index |
372 # index |
368 |
373 |
369 # 3-tuple of file handles being used for active writing. |
374 # 3-tuple of file handles being used for active writing. |
370 self._writinghandles = None |
375 self._writinghandles = None |
402 return self.index[rev][1] |
410 return self.index[rev][1] |
403 |
411 |
404 def end(self, rev): |
412 def end(self, rev): |
405 """the end of the data chunk for this revision""" |
413 """the end of the data chunk for this revision""" |
406 return self.start(rev) + self.length(rev) |
414 return self.start(rev) + self.length(rev) |
|
415 |
|
416 @util.propertycache |
|
417 def _compressor(self): |
|
418 engine = util.compengines[self.feature_config.compression_engine] |
|
419 return engine.revlogcompressor( |
|
420 self.feature_config.compression_engine_options |
|
421 ) |
|
422 |
|
423 @util.propertycache |
|
424 def _decompressor(self): |
|
425 """the default decompressor""" |
|
426 if self._default_compression_header is None: |
|
427 return None |
|
428 t = self._default_compression_header |
|
429 c = self._get_decompressor(t) |
|
430 return c.decompress |
|
431 |
|
432 def _get_decompressor(self, t): |
|
433 try: |
|
434 compressor = self._decompressors[t] |
|
435 except KeyError: |
|
436 try: |
|
437 engine = util.compengines.forrevlogheader(t) |
|
438 compressor = engine.revlogcompressor( |
|
439 self.feature_config.compression_engine_options |
|
440 ) |
|
441 self._decompressors[t] = compressor |
|
442 except KeyError: |
|
443 raise error.RevlogError( |
|
444 _(b'unknown compression type %s') % binascii.hexlify(t) |
|
445 ) |
|
446 return compressor |
|
447 |
|
448 def compress(self, data): |
|
449 """Generate a possibly-compressed representation of data.""" |
|
450 if not data: |
|
451 return b'', data |
|
452 |
|
453 compressed = self._compressor.compress(data) |
|
454 |
|
455 if compressed: |
|
456 # The revlog compressor added the header in the returned data. |
|
457 return b'', compressed |
|
458 |
|
459 if data[0:1] == b'\0': |
|
460 return b'', data |
|
461 return b'u', data |
|
462 |
|
463 def decompress(self, data): |
|
464 """Decompress a revlog chunk. |
|
465 |
|
466 The chunk is expected to begin with a header identifying the |
|
467 format type so it can be routed to an appropriate decompressor. |
|
468 """ |
|
469 if not data: |
|
470 return data |
|
471 |
|
472 # Revlogs are read much more frequently than they are written and many |
|
473 # chunks only take microseconds to decompress, so performance is |
|
474 # important here. |
|
475 # |
|
476 # We can make a few assumptions about revlogs: |
|
477 # |
|
478 # 1) the majority of chunks will be compressed (as opposed to inline |
|
479 # raw data). |
|
480 # 2) decompressing *any* data will likely by at least 10x slower than |
|
481 # returning raw inline data. |
|
482 # 3) we want to prioritize common and officially supported compression |
|
483 # engines |
|
484 # |
|
485 # It follows that we want to optimize for "decompress compressed data |
|
486 # when encoded with common and officially supported compression engines" |
|
487 # case over "raw data" and "data encoded by less common or non-official |
|
488 # compression engines." That is why we have the inline lookup first |
|
489 # followed by the compengines lookup. |
|
490 # |
|
491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib |
|
492 # compressed chunks. And this matters for changelog and manifest reads. |
|
493 t = data[0:1] |
|
494 |
|
495 if t == b'x': |
|
496 try: |
|
497 return _zlibdecompress(data) |
|
498 except zlib.error as e: |
|
499 raise error.RevlogError( |
|
500 _(b'revlog decompress error: %s') |
|
501 % stringutil.forcebytestr(e) |
|
502 ) |
|
503 # '\0' is more common than 'u' so it goes first. |
|
504 elif t == b'\0': |
|
505 return data |
|
506 elif t == b'u': |
|
507 return util.buffer(data, 1) |
|
508 |
|
509 compressor = self._get_decompressor(t) |
|
510 |
|
511 return compressor.decompress(data) |
407 |
512 |
408 @contextlib.contextmanager |
513 @contextlib.contextmanager |
409 def reading(self): |
514 def reading(self): |
410 """Context manager that keeps data and sidedata files open for reading""" |
515 """Context manager that keeps data and sidedata files open for reading""" |
411 if len(self.index) == 0: |
516 if len(self.index) == 0: |
1282 _(b"index %s is corrupted") % self.display_id |
1387 _(b"index %s is corrupted") % self.display_id |
1283 ) |
1388 ) |
1284 self.index = index |
1389 self.index = index |
1285 # revnum -> (chain-length, sum-delta-length) |
1390 # revnum -> (chain-length, sum-delta-length) |
1286 self._chaininfocache = util.lrucachedict(500) |
1391 self._chaininfocache = util.lrucachedict(500) |
1287 # revlog header -> revlog compressor |
|
1288 self._decompressors = {} |
|
1289 |
1392 |
1290 return chunkcache |
1393 return chunkcache |
1291 |
1394 |
1292 def _load_inner(self, chunk_cache): |
1395 def _load_inner(self, chunk_cache): |
|
1396 if self._docket is None: |
|
1397 default_compression_header = None |
|
1398 else: |
|
1399 default_compression_header = self._docket.default_compression_header |
|
1400 |
1293 self._inner = _InnerRevlog( |
1401 self._inner = _InnerRevlog( |
1294 opener=self.opener, |
1402 opener=self.opener, |
1295 index=self.index, |
1403 index=self.index, |
1296 index_file=self._indexfile, |
1404 index_file=self._indexfile, |
1297 data_file=self._datafile, |
1405 data_file=self._datafile, |
1298 sidedata_file=self._sidedatafile, |
1406 sidedata_file=self._sidedatafile, |
1299 inline=self._inline, |
1407 inline=self._inline, |
1300 data_config=self.data_config, |
1408 data_config=self.data_config, |
|
1409 feature_config=self.feature_config, |
1301 chunk_cache=chunk_cache, |
1410 chunk_cache=chunk_cache, |
|
1411 default_compression_header=default_compression_header, |
1302 ) |
1412 ) |
1303 |
1413 |
1304 def get_revlog(self): |
1414 def get_revlog(self): |
1305 """simple function to mirror API of other not-really-revlog API""" |
1415 """simple function to mirror API of other not-really-revlog API""" |
1306 return self |
1416 return self |
1316 # Reference the file without the "data/" prefix, so it is familiar |
1426 # Reference the file without the "data/" prefix, so it is familiar |
1317 # to the user. |
1427 # to the user. |
1318 return self.target[1] |
1428 return self.target[1] |
1319 else: |
1429 else: |
1320 return self.radix |
1430 return self.radix |
1321 |
|
1322 def _get_decompressor(self, t): |
|
1323 try: |
|
1324 compressor = self._decompressors[t] |
|
1325 except KeyError: |
|
1326 try: |
|
1327 engine = util.compengines.forrevlogheader(t) |
|
1328 compressor = engine.revlogcompressor( |
|
1329 self.feature_config.compression_engine_options |
|
1330 ) |
|
1331 self._decompressors[t] = compressor |
|
1332 except KeyError: |
|
1333 raise error.RevlogError( |
|
1334 _(b'unknown compression type %s') % binascii.hexlify(t) |
|
1335 ) |
|
1336 return compressor |
|
1337 |
|
1338 @util.propertycache |
|
1339 def _compressor(self): |
|
1340 engine = util.compengines[self.feature_config.compression_engine] |
|
1341 return engine.revlogcompressor( |
|
1342 self.feature_config.compression_engine_options |
|
1343 ) |
|
1344 |
|
1345 @util.propertycache |
|
1346 def _decompressor(self): |
|
1347 """the default decompressor""" |
|
1348 if self._docket is None: |
|
1349 return None |
|
1350 t = self._docket.default_compression_header |
|
1351 c = self._get_decompressor(t) |
|
1352 return c.decompress |
|
1353 |
1431 |
1354 def _datafp(self, mode=b'r'): |
1432 def _datafp(self, mode=b'r'): |
1355 """file object for the revlog's data file""" |
1433 """file object for the revlog's data file""" |
1356 return self.opener(self._datafile, mode=mode) |
1434 return self.opener(self._datafile, mode=mode) |
1357 |
1435 |
2270 compression_mode = self.index[rev][10] |
2348 compression_mode = self.index[rev][10] |
2271 data = self._inner.get_segment_for_revs(rev, rev)[1] |
2349 data = self._inner.get_segment_for_revs(rev, rev)[1] |
2272 if compression_mode == COMP_MODE_PLAIN: |
2350 if compression_mode == COMP_MODE_PLAIN: |
2273 return data |
2351 return data |
2274 elif compression_mode == COMP_MODE_DEFAULT: |
2352 elif compression_mode == COMP_MODE_DEFAULT: |
2275 return self._decompressor(data) |
2353 return self._inner._decompressor(data) |
2276 elif compression_mode == COMP_MODE_INLINE: |
2354 elif compression_mode == COMP_MODE_INLINE: |
2277 return self.decompress(data) |
2355 return self._inner.decompress(data) |
2278 else: |
2356 else: |
2279 msg = b'unknown compression mode %d' |
2357 msg = b'unknown compression mode %d' |
2280 msg %= compression_mode |
2358 msg %= compression_mode |
2281 raise error.RevlogError(msg) |
2359 raise error.RevlogError(msg) |
2282 |
2360 |
2326 except OverflowError: |
2404 except OverflowError: |
2327 # issue4215 - we can't cache a run of chunks greater than |
2405 # issue4215 - we can't cache a run of chunks greater than |
2328 # 2G on Windows |
2406 # 2G on Windows |
2329 return [self._chunk(rev) for rev in revschunk] |
2407 return [self._chunk(rev) for rev in revschunk] |
2330 |
2408 |
2331 decomp = self.decompress |
2409 decomp = self._inner.decompress |
2332 # self._decompressor might be None, but will not be used in that case |
2410 # self._decompressor might be None, but will not be used in that case |
2333 def_decomp = self._decompressor |
2411 def_decomp = self._inner._decompressor |
2334 for rev in revschunk: |
2412 for rev in revschunk: |
2335 chunkstart = start(rev) |
2413 chunkstart = start(rev) |
2336 if inline: |
2414 if inline: |
2337 chunkstart += (rev + 1) * iosize |
2415 chunkstart += (rev + 1) * iosize |
2338 chunklength = length(rev) |
2416 chunklength = length(rev) |
2542 |
2620 |
2543 comp = self.index[rev][11] |
2621 comp = self.index[rev][11] |
2544 if comp == COMP_MODE_PLAIN: |
2622 if comp == COMP_MODE_PLAIN: |
2545 segment = comp_segment |
2623 segment = comp_segment |
2546 elif comp == COMP_MODE_DEFAULT: |
2624 elif comp == COMP_MODE_DEFAULT: |
2547 segment = self._decompressor(comp_segment) |
2625 segment = self._inner._decompressor(comp_segment) |
2548 elif comp == COMP_MODE_INLINE: |
2626 elif comp == COMP_MODE_INLINE: |
2549 segment = self.decompress(comp_segment) |
2627 segment = self._inner.decompress(comp_segment) |
2550 else: |
2628 else: |
2551 msg = b'unknown compression mode %d' |
2629 msg = b'unknown compression mode %d' |
2552 msg %= comp |
2630 msg %= comp |
2553 raise error.RevlogError(msg) |
2631 raise error.RevlogError(msg) |
2554 |
2632 |
2840 deltacomputer=deltacomputer, |
2918 deltacomputer=deltacomputer, |
2841 sidedata=sidedata, |
2919 sidedata=sidedata, |
2842 ) |
2920 ) |
2843 |
2921 |
2844 def compress(self, data): |
2922 def compress(self, data): |
2845 """Generate a possibly-compressed representation of data.""" |
2923 return self._inner.compress(data) |
2846 if not data: |
|
2847 return b'', data |
|
2848 |
|
2849 compressed = self._compressor.compress(data) |
|
2850 |
|
2851 if compressed: |
|
2852 # The revlog compressor added the header in the returned data. |
|
2853 return b'', compressed |
|
2854 |
|
2855 if data[0:1] == b'\0': |
|
2856 return b'', data |
|
2857 return b'u', data |
|
2858 |
2924 |
2859 def decompress(self, data): |
2925 def decompress(self, data): |
2860 """Decompress a revlog chunk. |
2926 return self._inner.decompress(data) |
2861 |
|
2862 The chunk is expected to begin with a header identifying the |
|
2863 format type so it can be routed to an appropriate decompressor. |
|
2864 """ |
|
2865 if not data: |
|
2866 return data |
|
2867 |
|
2868 # Revlogs are read much more frequently than they are written and many |
|
2869 # chunks only take microseconds to decompress, so performance is |
|
2870 # important here. |
|
2871 # |
|
2872 # We can make a few assumptions about revlogs: |
|
2873 # |
|
2874 # 1) the majority of chunks will be compressed (as opposed to inline |
|
2875 # raw data). |
|
2876 # 2) decompressing *any* data will likely by at least 10x slower than |
|
2877 # returning raw inline data. |
|
2878 # 3) we want to prioritize common and officially supported compression |
|
2879 # engines |
|
2880 # |
|
2881 # It follows that we want to optimize for "decompress compressed data |
|
2882 # when encoded with common and officially supported compression engines" |
|
2883 # case over "raw data" and "data encoded by less common or non-official |
|
2884 # compression engines." That is why we have the inline lookup first |
|
2885 # followed by the compengines lookup. |
|
2886 # |
|
2887 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib |
|
2888 # compressed chunks. And this matters for changelog and manifest reads. |
|
2889 t = data[0:1] |
|
2890 |
|
2891 if t == b'x': |
|
2892 try: |
|
2893 return _zlibdecompress(data) |
|
2894 except zlib.error as e: |
|
2895 raise error.RevlogError( |
|
2896 _(b'revlog decompress error: %s') |
|
2897 % stringutil.forcebytestr(e) |
|
2898 ) |
|
2899 # '\0' is more common than 'u' so it goes first. |
|
2900 elif t == b'\0': |
|
2901 return data |
|
2902 elif t == b'u': |
|
2903 return util.buffer(data, 1) |
|
2904 |
|
2905 compressor = self._get_decompressor(t) |
|
2906 |
|
2907 return compressor.decompress(data) |
|
2908 |
2927 |
2909 def _addrevision( |
2928 def _addrevision( |
2910 self, |
2929 self, |
2911 node, |
2930 node, |
2912 rawtext, |
2931 rawtext, |
3027 sidedata_compression_mode = COMP_MODE_INLINE |
3046 sidedata_compression_mode = COMP_MODE_INLINE |
3028 if sidedata and self.feature_config.has_side_data: |
3047 if sidedata and self.feature_config.has_side_data: |
3029 sidedata_compression_mode = COMP_MODE_PLAIN |
3048 sidedata_compression_mode = COMP_MODE_PLAIN |
3030 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
3049 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata) |
3031 sidedata_offset = self._docket.sidedata_end |
3050 sidedata_offset = self._docket.sidedata_end |
3032 h, comp_sidedata = self.compress(serialized_sidedata) |
3051 h, comp_sidedata = self._inner.compress(serialized_sidedata) |
3033 if ( |
3052 if ( |
3034 h != b'u' |
3053 h != b'u' |
3035 and comp_sidedata[0:1] != b'\0' |
3054 and comp_sidedata[0:1] != b'\0' |
3036 and len(comp_sidedata) < len(serialized_sidedata) |
3055 and len(comp_sidedata) < len(serialized_sidedata) |
3037 ): |
3056 ): |