diff hgext/obsolete.py @ 334:fb83210bce32 stable

obsolete: move to official binary format
author Pierre-Yves David <pierre-yves.david@logilab.fr>
date Tue, 03 Jul 2012 12:58:55 +0200
parents 36e2016d6563
children c27a465daef2 6b92f8d5ae58
line wrap: on
line diff
--- a/hgext/obsolete.py	Wed Jun 27 17:21:24 2012 +0200
+++ b/hgext/obsolete.py	Tue Jul 03 12:58:55 2012 +0200
@@ -86,6 +86,12 @@
 import base64
 import json
 
+import struct
+from mercurial import util, base85
+
+_pack = struct.pack
+_unpack = struct.unpack
+
 from mercurial import util
 from mercurial import context
 from mercurial import revset
@@ -191,10 +197,10 @@
     """Precursor of a changeset"""
     cs = set()
     nm = repo.changelog.nodemap
-    markerbysubj = repo.obsoletestore.subjects
+    markerbysubj = repo.obsstore.successors
     for r in s:
         for p in markerbysubj.get(repo[r].node(), ()):
-            pr = nm.get(p['object'])
+            pr = nm.get(p[0])
             if pr is not None:
                 cs.add(pr)
     return cs
@@ -209,11 +215,11 @@
     """transitive precursors of a subset"""
     toproceed = [repo[r].node() for r in s]
     seen = set()
-    allsubjects = repo.obsoletestore.subjects
+    allsubjects = repo.obsstore.successors
     while toproceed:
         nc = toproceed.pop()
         for mark in allsubjects.get(nc, ()):
-            np = mark['object']
+            np = mark[0]
             if np not in seen:
                 seen.add(np)
                 toproceed.append(np)
@@ -235,10 +241,10 @@
     """Successors of a changeset"""
     cs = set()
     nm = repo.changelog.nodemap
-    markerbyobj = repo.obsoletestore.objects
+    markerbyobj = repo.obsstore.precursors
     for r in s:
         for p in markerbyobj.get(repo[r].node(), ()):
-            for sub in p['subjects']:
+            for sub in p[1]:
                 sr = nm.get(sub)
                 if sr is not None:
                     cs.add(sr)
@@ -254,11 +260,11 @@
     """transitive successors of a subset"""
     toproceed = [repo[r].node() for r in s]
     seen = set()
-    allobjects = repo.obsoletestore.objects
+    allobjects = repo.obsstore.precursors
     while toproceed:
         nc = toproceed.pop()
         for mark in allobjects.get(nc, ()):
-            for sub in mark['subjects']:
+            for sub in mark[1]:
                 if sub not in seen:
                     seen.add(sub)
                     toproceed.append(sub)
@@ -407,29 +413,30 @@
 # Pushkey mechanism for mutable
 #########################################
 
-def pushobsolete(repo, key, old, raw):
-    """push obsolete relation through pushkey"""
-    assert key == "markers"
-    l = repo.lock()
+def listmarkers(repo):
+    """List markers over pushkey"""
+    if not repo.obsstore:
+        return {}
+    data = repo.obsstore._writemarkers()
+    return {'dump': base85.b85encode(data)}
+
+def pushmarker(repo, key, old, new):
+    """Push markers over pushkey"""
+    if key != 'dump':
+        repo.ui.warn(_('unknown key: %r') % key)
+        return 0
+    if old:
+        repo.ui.warn(_('unexpected old value') % key)
+        return 0
+    data = base85.b85decode(new)
+    lock = repo.lock()
     try:
-        tmp = StringIO()
-        tmp.write(raw)
-        tmp.seek(0)
-        repo.obsoletestore.load(tmp)
-        repo.obsoletestore._dirty = True # XXX meh
+        repo.obsstore.mergemarkers(data)
         return 1
     finally:
-        l.release()
-
-def listobsolete(repo):
-    """dump all obsolete relation in
+        lock.release()
 
-    XXX this have be improved"""
-    tmp = StringIO()
-    repo.obsoletestore.save(tmp)
-    return {'markers': base64.b64encode(tmp.getvalue())}
-
-pushkey.register('obsolete', pushobsolete, listobsolete)
+pushkey.register('obsolete', pushmarker, listmarkers)
 
 ### Discovery wrapping
 #############################
@@ -499,10 +506,10 @@
                     new = set()
                     while nodes:
                         n = nodes.pop()
-                        if n in repo.obsoletestore.objects:
-                            markers = repo.obsoletestore.objects[n]
+                        if n in repo.obsstore.precursors:
+                            markers = repo.obsstore.precursors[n]
                             for mark in markers:
-                                for newernode in mark['subjects']:
+                                for newernode in mark[1]:
                                     if newernode is not None:
                                         nodes.append(newernode)
                         else:
@@ -563,35 +570,51 @@
     """import markers from an .hg/obsolete-relations file"""
     cnt = 0
     l = repo.lock()
+    some = False
     try:
         repo._importoldobsolete = True
-        store = repo.obsoletestore
+        store = repo.obsstore
+        ### very first format
         try:
             f = repo.opener('obsolete-relations')
             try:
+                some = True
                 for line in f:
                     subhex, objhex = line.split()
-                    sub = bin(subhex)
-                    obj = bin(objhex)
-                    newmarker = {
-                        'subjects': (sub==nullid) and [] or [sub],
-                        'object': obj,
-                        'date':  util.makedate(),
+                    suc = bin(subhex)
+                    prec = bin(objhex)
+                    sucs = (suc==nullid) and [] or [suc]
+                    meta = {
+                        'date':  '%i %i' % util.makedate(),
                         'user': ui.username(),
-                        'reason': 'import from older format.',
                         }
-                    store.new(newmarker)
-                    store._dirty = True
+                    store.create(prec, sucs, 0, meta)
                     cnt += 1
             finally:
                 f.close()
             util.unlink(repo.join('obsolete-relations'))
         except IOError:
-            ui.warn('nothing to do\n')
             pass
+        ### second (json) format
+        data = repo.sopener.tryread('obsoletemarkers')
+        if data:
+            some = True
+            for oldmark in json.loads(data):
+                del oldmark['id'] # dropped for now
+                del oldmark['reason'] # unused until then
+                oldmark['subjects'] = [bin(n) for n in oldmark['subjects']]
+                oldmark['object'] = bin(oldmark['object'])
+                oldmark['date'] = '%i %i' % tuple(oldmark['date'])
+                store.create(oldmark.pop('object'),
+                             oldmark.pop('subjects'),
+                             0, oldmark)
+                cnt += 1
+            util.unlink(repo.sjoin('obsoletemarkers'))
     finally:
         del repo._importoldobsolete
         l.release()
+    if not some:
+            ui.warn('nothing to do\n')
     ui.status('%i obsolete marker converted\n' % cnt)
 
 @command('debugsuccessors', [], '')
@@ -603,9 +626,9 @@
     """
     lock = repo.lock()
     try:
-        allsuccessors = repo.obsoletestore.objects
+        allsuccessors = repo.obsstore.precursors
         for old in sorted(allsuccessors):
-            successors = [sorted(m['subjects']) for m in allsuccessors[old]]
+            successors = [sorted(m[1]) for m in allsuccessors[old]]
             for i, group in enumerate(sorted(successors)):
                 ui.write('%s' % short(old))
                 for new in group:
@@ -633,14 +656,14 @@
     if new != oldnode:
         lock = repo.lock()
         try:
-            newmarker = {
+            meta = {
                 'subjects':  [new],
                 'object': oldnode,
                 'date':  util.makedate(),
                 'user': ui.username(),
                 'reason': 'commit --amend',
                 }
-            repo.obsoletestore.new(newmarker)
+            repo.obsstore.create(oldnode, [new], 0, meta)
             repo._clearobsoletecache()
             repo._turn_extinct_secret()
         finally:
@@ -701,7 +724,7 @@
     toproceed = set([(obs,)])
     # XXX known optimization available
     newer = set()
-    objectrels = repo.obsoletestore.objects
+    objectrels = repo.obsstore.precursors
     while toproceed:
         current = toproceed.pop()
         assert len(current) <= 1, 'splitting not handled yet. %r' % current
@@ -710,7 +733,7 @@
             if n in objectrels:
                 markers = objectrels[n]
                 for mark in markers:
-                    toproceed.add(tuple(mark['subjects']))
+                    toproceed.add(tuple(mark[1]))
             else:
                 newer.add(tuple(current))
         else:
@@ -740,74 +763,158 @@
     a.update('\0')
     return a.digest()
 
-class obsoletestore(object):
-    """Store obsolete relations
+# mercurial backport
+
+def encodemeta(meta):
+    """Return encoded metadata string to string mapping.
+
+    Assume no ':' in key and no '\0' in both key and value."""
+    for key, value in meta.iteritems():
+        if ':' in key or '\0' in key:
+            raise ValueError("':' and '\0' are forbidden in metadata key'")
+        if '\0' in value:
+            raise ValueError("':' are forbidden in metadata value'")
+    return '\0'.join(['%s:%s' % (k, meta[k]) for k in sorted(meta)])
 
-    Relation are stored in three mapping. All mapping have "obsolete markers"
-    as values::
+def decodemeta(data):
+    """Return string to string dictionary from encoded version."""
+    d = {}
+    for l in data.split('\0'):
+        if l:
+            key, value = l.split(':')
+            d[key] = value
+    return d
+
+# data used for parsing and writing
+_fmversion = 0
+_fmfixed   = '>BIB20s'
+_fmnode = '20s'
+_fmfsize = struct.calcsize(_fmfixed)
+_fnodesize = struct.calcsize(_fmnode)
 
-        {'id': "unique id of the obsolete marker"
-         'subjects': "0-N newer version of changeset in "object" (as ordered list)
-         'object': "old and obsolete version"
-         'date': "When was this marker created ?"
-         'user': "Who did that ?"
-         'reason': "Why was it done"
-        }
+def _readmarkers(data):
+    """Read and enumerate markers from raw data"""
+    off = 0
+    diskversion = _unpack('>B', data[off:off + 1])[0]
+    off += 1
+    if diskversion != _fmversion:
+        raise util.Abort(_('parsing obsolete marker: unknown version %r')
+                         % diskversion)
 
-    Three keys exists
-
-    :self._markers: "id" -> marker
+    # Loop on markers
+    l = len(data)
+    while off + _fmfsize <= l:
+        # read fixed part
+        cur = data[off:off + _fmfsize]
+        off += _fmfsize
+        nbsuc, mdsize, flags, pre = _unpack(_fmfixed, cur)
+        # read replacement
+        sucs = ()
+        if nbsuc:
+            s = (_fnodesize * nbsuc)
+            cur = data[off:off + s]
+            sucs = _unpack(_fmnode * nbsuc, cur)
+            off += s
+        # read metadata
+        # (metadata will be decoded on demand)
+        metadata = data[off:off + mdsize]
+        if len(metadata) != mdsize:
+            raise util.Abort(_('parsing obsolete marker: metadata is too '
+                               'short, %d bytes expected, got %d')
+                             % (len(metadata), mdsize))
+        off += mdsize
+        yield (pre, sucs, flags, metadata)
 
-    :self.subjects: "subject" -> set(marker)
+class obsstore(object):
+    """Store obsolete markers
 
-    :self.objects: "object" -> set(marker)
+    Markers can be accessed with two mappings:
+    - precursors: old -> set(new)
+    - successors: new -> set(old)
     """
 
     def __init__(self):
-        self._markers = {}
-        self.subjects = {}
-        self.objects = {}
-        self._dirty = False # should be on repo
+        self._all = []
+        # new markers to serialize
+        self._new = []
+        self.precursors = {}
+        self.successors = {}
+
+    def __iter__(self):
+        return iter(self._all)
+
+    def __nonzero__(self):
+        return bool(self._all)
+
+    def create(self, prec, succs=(), flag=0, metadata=None):
+        """obsolete: add a new obsolete marker
 
-    def new(self, marker):
-        """Add a *new* marker to the store. computing it's ID"""
-        mid = marker['id'] = markerid(marker)
-        self._insert(marker)
-        self._dirty = True
-        return mid
+        * ensuring it is hashable
+        * check mandatory metadata
+        * encode metadata
+        """
+        if metadata is None:
+            metadata = {}
+        if len(prec) != 20:
+            raise ValueError(prec)
+        for succ in succs:
+            if len(succ) != 20:
+                raise ValueError(prec)
+        marker = (str(prec), tuple(succs), int(flag), encodemeta(metadata))
+        self.add(marker)
 
-    def _insert(self, marker):
-        if marker['id'] not in self._markers:
-            self._markers[marker['id']] = marker
-            add2set(self.objects, marker['object'], marker)
-            for subj in marker['subjects']:
-                add2set(self.subjects, subj, marker)
+    def add(self, marker):
+        """Add a new marker to the store
+
+        This marker still needs to be written to disk"""
+        self._new.append(marker)
+        self._load(marker)
+
+    def loadmarkers(self, data):
+        """Load all markers in data, mark them as known."""
+        for marker in _readmarkers(data):
+            self._load(marker)
 
-    def save(self, stream):
-        markers = []
-        for mark in self._markers.itervalues():
-            jmark = mark.copy()
-            jmark['id'] = hex(jmark['id'])
-            jmark['subjects'] = [hex(n) for n in jmark['subjects']]
-            jmark['object'] = hex(jmark['object'])
-            markers.append(jmark)
-        json.dump(markers, stream, indent=4)
+    def mergemarkers(self, data):
+        other = set(_readmarkers(data))
+        local = set(self._all)
+        new = other - local
+        for marker in new:
+            self.add(marker)
+
+    def flushmarkers(self, stream):
+        """Write all markers to a stream
+
+        After this operation, "new" markers are considered "known"."""
+        self._writemarkers(stream)
+        self._new[:] = []
 
-    def load(self, stream):
-        for mark in json.load(stream):
-            mark['id'] = bin(mark['id'])
-            mark['subjects'] = [bin(n) for n in mark['subjects']]
-            mark['object'] = bin(mark['object'])
-            self._insert(mark)
+    def _load(self, marker):
+        self._all.append(marker)
+        pre, sucs = marker[:2]
+        self.precursors.setdefault(pre, set()).add(marker)
+        for suc in sucs:
+            self.successors.setdefault(suc, set()).add(marker)
 
-def writeobsolete(repo):
-    """wire obsolete data on disk"""
-    f = repo.sopener('obsoletemarkers', 'w', atomictemp=True)
-    try:
-        repo.obsoletestore.save(f)
-        repo._dirty = False
-    finally:
-        f.close()
+    def _writemarkers(self, stream=None):
+        # Kept separate from flushmarkers(), it will be reused for
+        # markers exchange.
+        if stream is None:
+            final = []
+            w = final.append
+        else:
+            w = stream.write
+        w(_pack('>B', _fmversion))
+        for marker in self._all:
+            pre, sucs, flags, metadata = marker
+            nbsuc = len(sucs)
+            format = _fmfixed + (_fmnode * nbsuc)
+            data = [nbsuc, len(metadata), flags, pre]
+            data.extend(sucs)
+            w(_pack(format, *data))
+            w(metadata)
+        if stream is None:
+            return ''.join(final)
 
 
 ### repo subclassing
@@ -835,30 +942,27 @@
         def obsoletedby(self, node):
             """return the set of node that make <node> obsolete (obj)"""
             others = set()
-            for marker in self.obsoletestore.objects.get(node, []):
-                others.update(marker['subjects'])
+            for marker in self.obsstore.precursors.get(node, []):
+                others.update(marker[1])
             return others
 
         def obsolete(self, node):
             """return the set of node that <node> make obsolete (sub)"""
-            return set(marker['object'] for marker in self.obsoletestore.subjects.get(node, []))
+            return set(marker[0] for marker in self.obsstore.successors.get(node, []))
 
-        @util.propertycache
-        def obsoletestore(self):
+        @storecache('obsstore')
+        def obsstore(self):
             if not getattr(self, '_importoldobsolete', False):
-                try:
-                    f = self.opener('obsolete-relations')
-                    f.close()
+                data = repo.opener.tryread('obsolete-relations')
+                if not data:
+                    data = repo.sopener.tryread('obsoletemarkers')
+                if data:
                     raise util.Abort('old format of obsolete marker detected!\n'
                                      'run `hg debugconvertobsolete` once.')
-                except IOError:
-                    pass
-            store = obsoletestore()
-            try:
-                f = self.sopener('obsoletemarkers')
-                store.load(f)
-            except IOError:
-                pass
+            store = obsstore()
+            data = self.sopener.tryread('obsstore')
+            if data:
+                store.loadmarkers(data)
             return store
 
         @util.propertycache
@@ -866,7 +970,7 @@
             """the set of obsolete revision"""
             obs = set()
             nm = self.changelog.nodemap
-            for obj in self.obsoletestore.objects:
+            for obj in self.obsstore.precursors:
                 try: # /!\api change in Hg 2.2 (e8d37b78acfb22ae2c1fb126c2)/!\
                     rev = nm.get(obj)
                 except TypeError:  #XXX to remove while breaking Hg 2.1 support
@@ -929,14 +1033,13 @@
                         % {'sub': short(sub), 'obj': short(obj)})
             lock = self.lock()
             try:
-                newmarker = {
-                    'subjects': (sub==nullid) and [] or [sub],
-                    'object': obj,
+                meta = {
                     'date':  util.makedate(),
                     'user': ui.username(),
                     'reason': 'unknown',
                     }
-                mid = self.obsoletestore.new(newmarker)
+                subs = (sub == nullid) and [] or [sub]
+                mid = self.obsstore.create(obj, subs, 0, meta)
                 self._clearobsoletecache()
                 self._turn_extinct_secret()
                 return mid
@@ -968,25 +1071,19 @@
             if not getattr(l.releasefn, 'obspatched', False):
                 oreleasefn = l.releasefn
                 def releasefn(*args, **kwargs):
-                    if self.obsoletestore._dirty:
-                        writeobsolete(self)
+                    if 'obsstore' in vars(self) and self.obsstore._new:
+                        f = self.sopener('obsstore', 'wb', atomictemp=True)
+                        try:
+                            self.obsstore.flushmarkers(f)
+                            f.close()
+                        except: # re-raises
+                            f.discard()
+                            raise
                     oreleasefn(*args, **kwargs)
                 releasefn.obspatched = True
                 l.releasefn = releasefn
             return l
 
-        def _readobsrels(self):
-            """Read obsolete relation on disk"""
-            # XXX handle lock
-            try:
-                f = self.opener('obsolete-relations')
-                try:
-                    return _obsdeserialise(f)
-                finally:
-                    f.close()
-            except IOError:
-                return {}
-
 
         ### pull // push support
 
@@ -995,16 +1092,13 @@
             l = repo.lock()
             try:
                 result = opull(remote, *args, **kwargs)
-                if 'obsolete' in remote.listkeys('namespaces'):
-                    tmp = StringIO()
-                    rels = remote.listkeys('obsolete')['markers']
-                    tmp.write(base64.b64decode(rels))
-                    tmp.seek(0)
-                    repo.obsoletestore.load(tmp)
-                    repo.obsoletestore._dirty = True # XXX meh
+                remoteobs = remote.listkeys('obsolete')
+                if 'dump' in remoteobs:
+                    data = base85.b85decode(remoteobs['dump'])
+                    self.obsstore.mergemarkers(data)
                     self._clearobsoletecache()
-                self._turn_extinct_secret()
-                return result
+                    self._turn_extinct_secret()
+                    return result
             finally:
                 l.release()
 
@@ -1012,10 +1106,12 @@
             """wrapper around pull that pull obsolete relation"""
             self._turn_extinct_secret()
             result = opush(remote, *args, **opts)
-            if 'obsolete' in remote.listkeys('namespaces'):
-                tmp = StringIO()
-                self.obsoletestore.save(tmp)
-                remote.pushkey('obsolete', 'markers', '', tmp.getvalue())
+            if 'obsolete' in self.listkeys('namespaces') and self.obsstore:
+                data = self.obsstore._writemarkers()
+                r = remote.pushkey('obsolete', 'dump', '',
+                                   base85.b85encode(data))
+                if not r:
+                    self.ui.warn(_('failed to push obsolete markers!\n'))
             self._turn_extinct_secret()
 
             return result
@@ -1026,12 +1122,12 @@
         # /!\ api change in  Hg 2.2 (97efd26eb9576f39590812ea9) /!\
         if util.safehasattr(repo, '_journalfiles'): # Hg 2.2
             def _journalfiles(self):
-                return o_journalfiles() + (self.sjoin('journal.obsoletemarkers'),) 
+                return o_journalfiles() + (self.sjoin('journal.obsstore'),) 
 
             def _writejournal(self, desc):
                 """wrapped version of _writejournal that save obsolete data"""
                 o_writejournal(desc)
-                filename = 'obsoletemarkers'
+                filename = 'obsstore'
                 filepath = self.sjoin(filename)
                 if os.path.exists(filepath):
                     journalname = 'journal.' + filename
@@ -1042,7 +1138,7 @@
             def _writejournal(self, desc):
                 """wrapped version of _writejournal that save obsolete data"""
                 entries = list(o_writejournal(desc))
-                filename = 'obsoletemarkers'
+                filename = 'obsstore'
                 filepath = self.sjoin(filename)
                 if  os.path.exists(filepath):
                     journalname = 'journal.' + filename
@@ -1055,15 +1151,15 @@
             """wrapped version of _rollback that restore obsolete data"""
             ret = o_rollback(dryrun, force)
             if not (ret or dryrun): #rollback did not failed
-                src = self.sjoin('undo.obsoletemarkers')
-                dst = self.sjoin('obsoletemarkers')
+                src = self.sjoin('undo.obsstore')
+                dst = self.sjoin('obsstore')
                 if os.path.exists(src):
                     util.rename(src, dst)
                 elif os.path.exists(dst):
                     # If no state was saved because the file did not existed before.
                     os.unlink(dst)
                 # invalidate cache
-                self.__dict__.pop('obsoletestore', None)
+                self.__dict__.pop('obsstore', None)
             return ret
 
         @storecache('00changelog.i')