--- a/mercurial/branchmap.py Fri Jan 09 22:53:38 2015 +0800
+++ b/mercurial/branchmap.py Thu Jan 08 00:01:03 2015 +0100
@@ -9,6 +9,8 @@
import encoding
import util
import time
+from array import array
+from struct import calcsize, pack, unpack
def _filename(repo):
"""name of a branchcache file for a given repo or repoview"""
@@ -285,3 +287,150 @@
duration = time.time() - starttime
repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
repo.filtername, duration)
+
+# Revision branch info cache
+
+_rbcversion = '-v1'
+_rbcnames = 'cache/rbc-names' + _rbcversion
+_rbcrevs = 'cache/rbc-revs' + _rbcversion
+# [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
+_rbcrecfmt = '>4sI'
+_rbcrecsize = calcsize(_rbcrecfmt)
+_rbcnodelen = 4
+_rbcbranchidxmask = 0x7fffffff
+_rbccloseflag = 0x80000000
+
+class revbranchcache(object):
+ """Persistent cache, mapping from revision number to branch name and close.
+ This is a low level cache, independent of filtering.
+
+ Branch names are stored in rbc-names in internal encoding separated by 0.
+ rbc-names is append-only, and each branch name is only stored once and will
+ thus have a unique index.
+
+ The branch info for each revision is stored in rbc-revs as constant size
+ records. The whole file is read into memory, but it is only 'parsed' on
+ demand. The file is usually append-only but will be truncated if repo
+ modification is detected.
+ The record for each revision contains the first 4 bytes of the
+ corresponding node hash, and the record is only used if it still matches.
+ Even a completely trashed rbc-revs fill thus still give the right result
+ while converging towards full recovery ... assuming no incorrectly matching
+ node hashes.
+ The record also contains 4 bytes where 31 bits contains the index of the
+ branch and the last bit indicate that it is a branch close commit.
+ The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
+ and will grow with it but be 1/8th of its size.
+ """
+
+ def __init__(self, repo):
+ assert repo.filtername is None
+ self._names = [] # branch names in local encoding with static index
+ self._rbcrevs = array('c') # structs of type _rbcrecfmt
+ self._rbcsnameslen = 0
+ try:
+ bndata = repo.vfs.read(_rbcnames)
+ self._rbcsnameslen = len(bndata) # for verification before writing
+ self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')]
+ except (IOError, OSError), inst:
+ repo.ui.debug("couldn't read revision branch cache names: %s\n" %
+ inst)
+ if self._names:
+ try:
+ data = repo.vfs.read(_rbcrevs)
+ self._rbcrevs.fromstring(data)
+ except (IOError, OSError), inst:
+ repo.ui.debug("couldn't read revision branch cache: %s\n" %
+ inst)
+ # remember number of good records on disk
+ self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
+ len(repo.changelog))
+ if self._rbcrevslen == 0:
+ self._names = []
+ self._rbcnamescount = len(self._names) # number of good names on disk
+ self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
+
+ def branchinfo(self, changelog, rev):
+ """Return branch name and close flag for rev, using and updating
+ persistent cache."""
+ rbcrevidx = rev * _rbcrecsize
+
+ # if requested rev is missing, add and populate all missing revs
+ if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
+ first = len(self._rbcrevs) // _rbcrecsize
+ self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize -
+ len(self._rbcrevs)))
+ for r in xrange(first, len(changelog)):
+ self._branchinfo(r)
+
+ # fast path: extract data from cache, use it if node is matching
+ reponode = changelog.node(rev)[:_rbcnodelen]
+ cachenode, branchidx = unpack(
+ _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize))
+ close = bool(branchidx & _rbccloseflag)
+ if close:
+ branchidx &= _rbcbranchidxmask
+ if cachenode == reponode:
+ return self._names[branchidx], close
+ # fall back to slow path and make sure it will be written to disk
+ self._rbcrevslen = min(self._rbcrevslen, rev)
+ return self._branchinfo(rev)
+
+ def _branchinfo(self, changelog, rev):
+ """Retrieve branch info from changelog and update _rbcrevs"""
+ b, close = changelog.branchinfo(rev)
+ if b in self._namesreverse:
+ branchidx = self._namesreverse[b]
+ else:
+ branchidx = len(self._names)
+ self._names.append(b)
+ self._namesreverse[b] = branchidx
+ reponode = changelog.node(rev)
+ if close:
+ branchidx |= _rbccloseflag
+ rbcrevidx = rev * _rbcrecsize
+ rec = array('c')
+ rec.fromstring(pack(_rbcrecfmt, reponode, branchidx))
+ self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec
+ return b, close
+
+ def write(self, repo):
+ """Save branch cache if it is dirty."""
+ if self._rbcnamescount < len(self._names):
+ try:
+ if self._rbcnamescount != 0:
+ f = repo.vfs.open(_rbcnames, 'ab')
+ if f.tell() == self._rbcsnameslen:
+ f.write('\0')
+ else:
+ f.close()
+ self._rbcnamescount = 0
+ self._rbcrevslen = 0
+ if self._rbcnamescount == 0:
+ f = repo.vfs.open(_rbcnames, 'wb')
+ f.write('\0'.join(encoding.fromlocal(b)
+ for b in self._names[self._rbcnamescount:]))
+ self._rbcsnameslen = f.tell()
+ f.close()
+ except (IOError, OSError, util.Abort), inst:
+ repo.ui.debug("couldn't write revision branch cache names: "
+ "%s\n" % inst)
+ return
+ self._rbcnamescount = len(self._names)
+
+ start = self._rbcrevslen * _rbcrecsize
+ if start != len(self._rbcrevs):
+ self._rbcrevslen = min(len(repo.changelog),
+ len(self._rbcrevs) // _rbcrecsize)
+ try:
+ f = repo.vfs.open(_rbcrevs, 'ab')
+ if f.tell() != start:
+ f.seek(start)
+ f.truncate()
+ end = self._rbcrevslen * _rbcrecsize
+ f.write(self._rbcrevs[start:end])
+ f.close()
+ except (IOError, OSError, util.Abort), inst:
+ repo.ui.debug("couldn't write revision branch cache: %s\n" %
+ inst)
+ return