changeset 39505:c6b8eab5db19

snapshot: also consider the snapshot chain of one unrelated revision To maximize the chance of good delta chain reuse, we inject an unrelated delta chain into our search. To do so, we search for the highest revision unrelated to the parents of the current revision and use its snapshot chain too. Adding this extra snapshot into the mix can have a performance impact. We'll deal with performance impact in a later series.
author Boris Feld <boris.feld@octobus.net>
date Fri, 07 Sep 2018 11:18:45 -0400
parents 05a165dc4f55
children b66ea3fc3a86
files mercurial/revlogutils/deltas.py tests/test-sparse-revlog.t
diffstat 2 files changed, 64 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlogutils/deltas.py	Fri Sep 07 11:17:37 2018 -0400
+++ b/mercurial/revlogutils/deltas.py	Fri Sep 07 11:18:45 2018 -0400
@@ -719,6 +719,36 @@
                 parents_snaps[idx].add(s)
         snapfloor = min(parents_snaps[0]) + 1
         _findsnapshots(revlog, snapshots, snapfloor)
+        # search for the highest "unrelated" revision
+        #
+        # Adding snapshots used by "unrelated" revision increase the odd we
+        # reuse an independant, yet better snapshot chain.
+        #
+        # XXX instead of building a set of revisions, we could lazily enumerate
+        # over the chains. That would be more efficient, however we stick to
+        # simple code for now.
+        all_revs = set()
+        for chain in candidate_chains:
+            all_revs.update(chain)
+        other = None
+        for r in revlog.revs(prev, snapfloor):
+            if r not in all_revs:
+                other = r
+                break
+        if other is not None:
+            # To avoid unfair competition, we won't use unrelated intermediate
+            # snapshot that are deeper than the ones from the parent delta
+            # chain.
+            max_depth = max(parents_snaps.keys())
+            chain = deltachain(other)
+            for idx, s in enumerate(chain):
+                if s < snapfloor:
+                    continue
+                if max_depth < idx:
+                    break
+                if not revlog.issnapshot(s):
+                    break
+                parents_snaps[idx].add(s)
         # Test them as possible intermediate snapshot base
         # We test them from highest to lowest level. High level one are more
         # likely to result in small delta
@@ -756,9 +786,10 @@
         # more and more snapshot as the repository grow.
         yield tuple(snapshots[nullrev])
 
-    # other approach failed try against prev to hopefully save us a
-    # fulltext.
-    yield (prev,)
+    if not sparse:
+        # other approach failed try against prev to hopefully save us a
+        # fulltext.
+        yield (prev,)
 
 class deltacomputer(object):
     def __init__(self, revlog):
--- a/tests/test-sparse-revlog.t	Fri Sep 07 11:17:37 2018 -0400
+++ b/tests/test-sparse-revlog.t	Fri Sep 07 11:18:45 2018 -0400
@@ -77,7 +77,7 @@
   
 
   $ f -s .hg/store/data/*.d
-  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=59302280
+  .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=59230936
   $ hg debugrevlog *
   format : 1
   flags  : generaldelta
@@ -89,45 +89,45 @@
       empty     :        0 ( 0.00%)
                      text  :        0 (100.00%)
                      delta :        0 (100.00%)
-      snapshot  :      168 ( 3.36%)
-        lvl-0   :              4 ( 0.08%)
-        lvl-1   :             18 ( 0.36%)
-        lvl-2   :             39 ( 0.78%)
-        lvl-3   :             54 ( 1.08%)
-        lvl-4   :             53 ( 1.06%)
-      deltas    :     4833 (96.64%)
-  revision size : 59302280
-      snapshot  :  5833942 ( 9.84%)
-        lvl-0   :         804068 ( 1.36%)
-        lvl-1   :        1378470 ( 2.32%)
-        lvl-2   :        1608138 ( 2.71%)
-        lvl-3   :        1222158 ( 2.06%)
-        lvl-4   :         821108 ( 1.38%)
-      deltas    : 53468338 (90.16%)
+      snapshot  :      176 ( 3.52%)
+        lvl-0   :              3 ( 0.06%)
+        lvl-1   :             17 ( 0.34%)
+        lvl-2   :             45 ( 0.90%)
+        lvl-3   :             56 ( 1.12%)
+        lvl-4   :             55 ( 1.10%)
+      deltas    :     4825 (96.48%)
+  revision size : 59230936
+      snapshot  :  5770371 ( 9.74%)
+        lvl-0   :         602962 ( 1.02%)
+        lvl-1   :        1534153 ( 2.59%)
+        lvl-2   :        1604445 ( 2.71%)
+        lvl-3   :        1218174 ( 2.06%)
+        lvl-4   :         810637 ( 1.37%)
+      deltas    : 53460565 (90.26%)
   
   chunks        :     5001
       0x78 (x)  :     5001 (100.00%)
-  chunks size   : 59302280
-      0x78 (x)  : 59302280 (100.00%)
+  chunks size   : 59230936
+      0x78 (x)  : 59230936 (100.00%)
   
   avg chain length  :       17
   max chain length  :       45
-  max chain reach   : 22744720
+  max chain reach   : 25326012
   compression ratio :       29
   
   uncompressed data size (min/max/avg) : 346468 / 346472 / 346471
-  full revision size (min/max/avg)     : 200985 / 201050 / 201017
-  inter-snapshot size (min/max/avg)    : 11598 / 163304 / 30669
-      level-1   (min/max/avg)          : 15616 / 163304 / 76581
-      level-2   (min/max/avg)          : 11602 / 86428 / 41234
-      level-3   (min/max/avg)          : 11598 / 42390 / 22632
-      level-4   (min/max/avg)          : 11603 / 19649 / 15492
-  delta size (min/max/avg)             : 10649 / 105465 / 11063
+  full revision size (min/max/avg)     : 200897 / 201050 / 200987
+  inter-snapshot size (min/max/avg)    : 11598 / 171990 / 29869
+      level-1   (min/max/avg)          : 14037 / 171990 / 90244
+      level-2   (min/max/avg)          : 11632 / 84456 / 35654
+      level-3   (min/max/avg)          : 11598 / 41486 / 21753
+      level-4   (min/max/avg)          : 11618 / 19913 / 14738
+  delta size (min/max/avg)             : 10649 / 105209 / 11079
   
-  deltas against prev  : 4167 (86.22%)
-      where prev = p1  : 4129     (99.09%)
+  deltas against prev  : 4156 (86.13%)
+      where prev = p1  : 4120     (99.13%)
       where prev = p2  :    0     ( 0.00%)
-      other            :   38     ( 0.91%)
-  deltas against p1    :  643 (13.30%)
+      other            :   36     ( 0.87%)
+  deltas against p1    :  646 (13.39%)
   deltas against p2    :   23 ( 0.48%)
   deltas against other :    0 ( 0.00%)