diff hgext/evolve.py @ 809:5d3ddede6ccf

debugobsstorestat: add information about cluster We now display information about independant osolescence markers clusters.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Mon, 24 Feb 2014 17:32:09 -0800
parents 81a3d9a24e6b
children acfa2b67cff6
line wrap: on
line diff
--- a/hgext/evolve.py	Mon Feb 24 19:00:04 2014 -0800
+++ b/hgext/evolve.py	Mon Feb 24 17:32:09 2014 -0800
@@ -860,6 +860,7 @@
      _('record the specified user in metadata'), _('USER')),
 ]
 
+
 @command('debugobsstorestat', [], '')
 def cmddebugobsstorestat(ui, repo):
     """print statistic about obsolescence markers in the repo"""
@@ -869,8 +870,14 @@
     ui.write('markers total:              %9i\n' % len(store._all))
     sucscount = [0, 0 , 0, 0]
     known = 0
+    parentsdata = 0
     metatotallenght = 0
     metakeys = {}
+    # node -> cluster mapping
+    #   a cluster is a (set(nodes), set(markers)) tuple
+    clustersmap = {}
+    # same data using parent information
+    pclustersmap= {}
     for mark in store:
         if mark[0] in nm:
             known += 1
@@ -881,8 +888,60 @@
         for key in meta:
             metakeys.setdefault(key, 0)
             metakeys[key] += 1
+        parents = [meta.get('p1'), meta.get('p2')]
+        parents = [node.bin(p) for p in parents if p is not None]
+        if parents:
+            parentsdata += 1
+        # cluster handling
+        nodes = set()
+        nodes.add(mark[0])
+        nodes.update(mark[1])
+        c = (set(nodes), set([mark]))
 
+        toproceed = set(nodes)
+        while toproceed:
+            n = toproceed.pop()
+            other = clustersmap.get(n)
+            if (other is not None
+                and other is not c):
+                other[0].update(c[0])
+                other[1].update(c[1])
+                for on in c[0]:
+                    if on in toproceed:
+                        continue
+                    clustersmap[on] = other
+                c = other
+            clustersmap[n] = c
+        # same with parent data
+        nodes.update(parents)
+        c = (set(nodes), set([mark]))
+        toproceed = set(nodes)
+        while toproceed:
+            n = toproceed.pop()
+            other = pclustersmap.get(n)
+            if (other is not None
+                and other is not c):
+                other[0].update(c[0])
+                other[1].update(c[1])
+                for on in c[0]:
+                    if on in toproceed:
+                        continue
+                    pclustersmap[on] = other
+                c = other
+            pclustersmap[n] = c
+
+    # freezing the result
+    for c in clustersmap.values():
+        fc = (frozenset(c[0]), frozenset(c[1]))
+        for n in fc[0]:
+            clustersmap[n] = fc
+    # same with parent data
+    for c in pclustersmap.values():
+        fc = (frozenset(c[0]), frozenset(c[1]))
+        for n in fc[0]:
+            pclustersmap[n] = fc
     ui.write('    for known precursors:   %9i\n' % known)
+    ui.write('    with parents data:      %9i\n' % parentsdata)
     # successors data
     ui.write('markers with no successors: %9i\n' % sucscount[0])
     ui.write('              1 successors: %9i\n' % sucscount[1])
@@ -895,6 +954,35 @@
     for key in sorted(metakeys):
         ui.write('    %15s:        %9i\n' % (key, metakeys[key]))
 
+    allclusters = list(set(clustersmap.values()))
+    allclusters.sort(key=lambda x: len(x[1]))
+    ui.write('disconnected clusters:      %9i\n' % len(allclusters))
+
+    ui.write('        any known node:     %9i\n'
+             % len([c for c in allclusters
+                    if [n for n in c[0] if nm.get(n) is not None]]))
+    if allclusters:
+        nbcluster = len(allclusters)
+        ui.write('        smallest length:    %9i\n' % len(allclusters[0][1]))
+        ui.write('        longer length:      %9i\n' % len(allclusters[-1][1]))
+        median = len(allclusters[nbcluster//2][1])
+        ui.write('        median length:      %9i\n' % median)
+        mean = sum(len(x[1]) for x in allclusters) // nbcluster
+        ui.write('        mean length:        %9i\n' % mean)
+    allpclusters = list(set(pclustersmap.values()))
+    allpclusters.sort(key=lambda x: len(x[1]))
+    ui.write('    using parents data:     %9i\n' % len(allpclusters))
+    ui.write('        any known node:     %9i\n'
+             % len([c for c in allclusters
+                    if [n for n in c[0] if nm.get(n) is not None]]))
+    if allpclusters:
+        nbcluster = len(allpclusters)
+        ui.write('        smallest length:    %9i\n' % len(allpclusters[0][1]))
+        ui.write('        longer length:      %9i\n' % len(allpclusters[-1][1]))
+        median = len(allpclusters[nbcluster//2][1])
+        ui.write('        median length:      %9i\n' % median)
+        mean = sum(len(x[1]) for x in allpclusters) // nbcluster
+        ui.write('        mean length:        %9i\n' % mean)
 
 @command('^evolve|stabilize|solve',
     [('n', 'dry-run', False, 'do not perform actions, print what to be done'),