comparison mercurial/utils/storageutil.py @ 49609:9cac281eb9c0

debug: add an option to display statistic about a bundling operation This will helps a lot to understand how the bundling decision might impact pull/unbundle on the other side.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 15 Nov 2022 16:25:23 +0100
parents d44e3c45f0e4
children 191f5057ec45
comparison
equal deleted inserted replaced
49608:78ba41878f2e 49609:9cac281eb9c0
303 flagsfn=None, 303 flagsfn=None,
304 deltamode=repository.CG_DELTAMODE_STD, 304 deltamode=repository.CG_DELTAMODE_STD,
305 revisiondata=False, 305 revisiondata=False,
306 assumehaveparentrevisions=False, 306 assumehaveparentrevisions=False,
307 sidedata_helpers=None, 307 sidedata_helpers=None,
308 debug_info=None,
308 ): 309 ):
309 """Generic implementation of ifiledata.emitrevisions(). 310 """Generic implementation of ifiledata.emitrevisions().
310 311
311 Emitting revision data is subtly complex. This function attempts to 312 Emitting revision data is subtly complex. This function attempts to
312 encapsulate all the logic for doing so in a backend-agnostic way. 313 encapsulate all the logic for doing so in a backend-agnostic way.
368 ``revisiondata`` 369 ``revisiondata``
369 ``assumehaveparentrevisions`` 370 ``assumehaveparentrevisions``
370 ``sidedata_helpers`` (optional) 371 ``sidedata_helpers`` (optional)
371 If not None, means that sidedata should be included. 372 If not None, means that sidedata should be included.
372 See `revlogutil.sidedata.get_sidedata_helpers`. 373 See `revlogutil.sidedata.get_sidedata_helpers`.
374
375 ``debug_info`
376 An optionnal dictionnary to gather information about the bundling
377 process (if present, see config: debug.bundling.stats.
373 """ 378 """
374 379
375 fnode = store.node 380 fnode = store.node
376 frev = store.rev 381 frev = store.rev
377 382
393 398
394 for rev in revs: 399 for rev in revs:
395 if rev == nullrev: 400 if rev == nullrev:
396 continue 401 continue
397 402
403 debug_delta_source = None
404 if debug_info is not None:
405 debug_info['revision-total'] += 1
406
398 node = fnode(rev) 407 node = fnode(rev)
399 p1rev, p2rev = store.parentrevs(rev) 408 p1rev, p2rev = store.parentrevs(rev)
400 409
410 if debug_info is not None:
411 if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev:
412 debug_info['merge-total'] += 1
413
401 if deltaparentfn: 414 if deltaparentfn:
402 deltaparentrev = deltaparentfn(rev) 415 deltaparentrev = deltaparentfn(rev)
416 if debug_info is not None:
417 if deltaparentrev == nullrev:
418 debug_info['available-full'] += 1
419 else:
420 debug_info['available-delta'] += 1
421
403 else: 422 else:
404 deltaparentrev = nullrev 423 deltaparentrev = nullrev
405 424
406 # Forced delta against previous mode. 425 # Forced delta against previous mode.
407 if deltamode == repository.CG_DELTAMODE_PREV: 426 if deltamode == repository.CG_DELTAMODE_PREV:
427 if debug_info is not None:
428 debug_delta_source = "prev"
408 baserev = prevrev 429 baserev = prevrev
409 430
410 # We're instructed to send fulltext. Honor that. 431 # We're instructed to send fulltext. Honor that.
411 elif deltamode == repository.CG_DELTAMODE_FULL: 432 elif deltamode == repository.CG_DELTAMODE_FULL:
433 if debug_info is not None:
434 debug_delta_source = "full"
412 baserev = nullrev 435 baserev = nullrev
413 # We're instructed to use p1. Honor that 436 # We're instructed to use p1. Honor that
414 elif deltamode == repository.CG_DELTAMODE_P1: 437 elif deltamode == repository.CG_DELTAMODE_P1:
438 if debug_info is not None:
439 debug_delta_source = "p1"
415 baserev = p1rev 440 baserev = p1rev
416 441
417 # There is a delta in storage. We try to use that because it 442 # There is a delta in storage. We try to use that because it
418 # amounts to effectively copying data from storage and is 443 # amounts to effectively copying data from storage and is
419 # therefore the fastest. 444 # therefore the fastest.
420 elif deltaparentrev != nullrev: 445 elif deltaparentrev != nullrev:
421 # Base revision was already emitted in this group. We can 446 # Base revision was already emitted in this group. We can
422 # always safely use the delta. 447 # always safely use the delta.
423 if deltaparentrev in available: 448 if deltaparentrev in available:
449 if debug_info is not None:
450 debug_delta_source = "storage"
424 baserev = deltaparentrev 451 baserev = deltaparentrev
425 452
426 # Base revision is a parent that hasn't been emitted already. 453 # Base revision is a parent that hasn't been emitted already.
427 # Use it if we can assume the receiver has the parent revision. 454 # Use it if we can assume the receiver has the parent revision.
428 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev): 455 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
456 if debug_info is not None:
457 debug_delta_source = "storage"
429 baserev = deltaparentrev 458 baserev = deltaparentrev
430
431 # No guarantee the receiver has the delta parent. Send delta 459 # No guarantee the receiver has the delta parent. Send delta
432 # against last revision (if possible), which in the common case 460 # against last revision (if possible), which in the common case
433 # should be similar enough to this revision that the delta is 461 # should be similar enough to this revision that the delta is
434 # reasonable. 462 # reasonable.
435 elif prevrev is not None: 463 elif prevrev is not None:
464 if debug_info is not None:
465 debug_info['denied-base-not-available'] += 1
466 debug_delta_source = "prev"
436 baserev = prevrev 467 baserev = prevrev
437 else: 468 else:
469 if debug_info is not None:
470 debug_info['denied-base-not-available'] += 1
471 debug_delta_source = "full"
438 baserev = nullrev 472 baserev = nullrev
439 473
440 # Storage has a fulltext revision. 474 # Storage has a fulltext revision.
441 475
442 # Let's use the previous revision, which is as good a guess as any. 476 # Let's use the previous revision, which is as good a guess as any.
443 # There is definitely room to improve this logic. 477 # There is definitely room to improve this logic.
444 elif prevrev is not None: 478 elif prevrev is not None:
479 if debug_info is not None:
480 debug_delta_source = "prev"
445 baserev = prevrev 481 baserev = prevrev
446 else: 482 else:
483 if debug_info is not None:
484 debug_delta_source = "full"
447 baserev = nullrev 485 baserev = nullrev
448 486
449 # But we can't actually use our chosen delta base for whatever 487 # But we can't actually use our chosen delta base for whatever
450 # reason. Reset to fulltext. 488 # reason. Reset to fulltext.
451 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)): 489 if (
490 baserev != nullrev
491 and candeltafn is not None
492 and not candeltafn(baserev, rev)
493 ):
494 if debug_info is not None:
495 debug_delta_source = "full"
496 debug_info['denied-delta-candeltafn'] += 1
452 baserev = nullrev 497 baserev = nullrev
453 498
454 revision = None 499 revision = None
455 delta = None 500 delta = None
456 baserevisionsize = None 501 baserevisionsize = None
458 if revisiondata: 503 if revisiondata:
459 if store.iscensored(baserev) or store.iscensored(rev): 504 if store.iscensored(baserev) or store.iscensored(rev):
460 try: 505 try:
461 revision = store.rawdata(node) 506 revision = store.rawdata(node)
462 except error.CensoredNodeError as e: 507 except error.CensoredNodeError as e:
508 if debug_info is not None:
509 debug_delta_source = "full"
510 debug_info['denied-delta-not-available'] += 1
463 revision = e.tombstone 511 revision = e.tombstone
464 512
465 if baserev != nullrev: 513 if baserev != nullrev:
466 if rawsizefn: 514 if rawsizefn:
467 baserevisionsize = rawsizefn(baserev) 515 baserevisionsize = rawsizefn(baserev)
469 baserevisionsize = len(store.rawdata(baserev)) 517 baserevisionsize = len(store.rawdata(baserev))
470 518
471 elif ( 519 elif (
472 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV 520 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
473 ): 521 ):
522 if debug_info is not None:
523 debug_info['computed-delta'] += 1 # close enough
524 debug_info['delta-full'] += 1
474 revision = store.rawdata(node) 525 revision = store.rawdata(node)
475 available.add(rev) 526 available.add(rev)
476 else: 527 else:
477 if revdifffn: 528 if revdifffn:
529 if debug_info is not None:
530 if debug_delta_source == "full":
531 debug_info['computed-delta'] += 1
532 debug_info['delta-full'] += 1
533 elif debug_delta_source == "prev":
534 debug_info['computed-delta'] += 1
535 debug_info['delta-against-prev'] += 1
536 elif debug_delta_source == "p1":
537 debug_info['computed-delta'] += 1
538 debug_info['delta-against-p1'] += 1
539 elif debug_delta_source == "storage":
540 debug_info['reused-storage-delta'] += 1
541 else:
542 assert False, 'unreachable'
543
478 delta = revdifffn(baserev, rev) 544 delta = revdifffn(baserev, rev)
479 else: 545 else:
546 if debug_info is not None:
547 if debug_delta_source == "full":
548 debug_info['computed-delta'] += 1
549 debug_info['delta-full'] += 1
550 elif debug_delta_source == "prev":
551 debug_info['computed-delta'] += 1
552 debug_info['delta-against-prev'] += 1
553 elif debug_delta_source == "p1":
554 debug_info['computed-delta'] += 1
555 debug_info['delta-against-p1'] += 1
556 elif debug_delta_source == "storage":
557 # seem quite unlikelry to happens
558 debug_info['computed-delta'] += 1
559 debug_info['reused-storage-delta'] += 1
560 else:
561 assert False, 'unreachable'
480 delta = mdiff.textdiff( 562 delta = mdiff.textdiff(
481 store.rawdata(baserev), store.rawdata(rev) 563 store.rawdata(baserev), store.rawdata(rev)
482 ) 564 )
483 565
484 available.add(rev) 566 available.add(rev)