changelog: rework the delayupdate mechanism
The current way we use the 'delayupdate' mechanism is wrong. We call
'delayupdate' right after the transaction retrieval, then we call 'finalize'
right before calling 'tr.close()'. The 'finalize' call will -always- result in a
flush to disk, making the data available to all readers. But the 'tr.close()' may
be a no-op if the transaction is nested. This would result in data:
1) exposed to reader too early,
2) rolled back by other part of the transaction after such exposure
So we need to end up in a situation where we call 'finalize' a single time when
the transaction actually closes. For this purpose we need to be able to call
'delayupdate' and '_writepending' multiple times and 'finalize' once. This was
not possible with the previous state of the code.
This changeset refactors the code to makes this possible. We buffer data in memory
as much as possible and fall-back to writing to a ".a" file after the first call
to '_writepending'.
--- a/mercurial/changelog.py Wed Nov 05 12:41:12 2014 -0600
+++ b/mercurial/changelog.py Sat Oct 18 01:12:18 2014 -0700
@@ -108,15 +108,21 @@
self.data.append(str(s))
self.offset += len(s)
-def delayopener(opener, target, divert, buf):
- def o(name, mode='r'):
+def _divertopener(opener, target):
+ """build an opener that writes in 'target.a' instead of 'target'"""
+ def _divert(name, mode='r'):
if name != target:
return opener(name, mode)
- if divert:
- return opener(name + ".a", mode.replace('a', 'w'))
- # otherwise, divert to memory
+ return opener(name + ".a", mode)
+ return _divert
+
+def _delayopener(opener, target, buf):
+ """build an opener that stores chunks in 'buf' instead of 'target'"""
+ def _delay(name, mode='r'):
+ if name != target:
+ return opener(name, mode)
return appender(opener, name, mode, buf)
- return o
+ return _delay
class changelog(revlog.revlog):
def __init__(self, opener):
@@ -127,7 +133,7 @@
self._generaldelta = False
self._realopener = opener
self._delayed = False
- self._delaybuf = []
+ self._delaybuf = None
self._divert = False
self.filteredrevs = frozenset()
@@ -220,11 +226,18 @@
def delayupdate(self):
"delay visibility of index updates to other readers"
+
+ if not self._delayed:
+ if len(self) == 0:
+ self._divert = True
+ if self._realopener.exists(self.indexfile + '.a'):
+ self._realopener.unlink(self.indexfile + '.a')
+ self.opener = _divertopener(self._realopener, self.indexfile)
+ else:
+ self._delaybuf = []
+ self.opener = _delayopener(self._realopener, self.indexfile,
+ self._delaybuf)
self._delayed = True
- self._divert = (len(self) == 0)
- self._delaybuf = []
- self.opener = delayopener(self._realopener, self.indexfile,
- self._divert, self._delaybuf)
def finalize(self, tr):
"finalize index updates"
@@ -232,6 +245,7 @@
self.opener = self._realopener
# move redirected index data back into place
if self._divert:
+ assert not self._delaybuf
tmpname = self.indexfile + ".a"
nfile = self.opener.open(tmpname)
nfile.close()
@@ -240,7 +254,8 @@
fp = self.opener(self.indexfile, 'a')
fp.write("".join(self._delaybuf))
fp.close()
- self._delaybuf = []
+ self._delaybuf = None
+ self._divert = False
# split when we're done
self.checkinlinesize(tr)
@@ -262,8 +277,9 @@
fp2.write("".join(self._delaybuf))
fp2.close()
# switch modes so finalize can simply rename
- self._delaybuf = []
+ self._delaybuf = None
self._divert = True
+ self.opener = _divertopener(self._realopener, self.indexfile)
if self._divert:
return True