# HG changeset patch # User Pierre-Yves David # Date 1413619938 25200 # Node ID 7e97bf6ee2d6de7ddc01fcf0e38881038e8971b1 # Parent 48a2eefd3d2048c85fc22bf06e32ba06a1698fff changelog: rework the delayupdate mechanism The current way we use the 'delayupdate' mechanism is wrong. We call 'delayupdate' right after the transaction retrieval, then we call 'finalize' right before calling 'tr.close()'. The 'finalize' call will -always- result in a flush to disk, making the data available to all readers. But the 'tr.close()' may be a no-op if the transaction is nested. This would result in data: 1) exposed to reader too early, 2) rolled back by other part of the transaction after such exposure So we need to end up in a situation where we call 'finalize' a single time when the transaction actually closes. For this purpose we need to be able to call 'delayupdate' and '_writepending' multiple times and 'finalize' once. This was not possible with the previous state of the code. This changeset refactors the code to makes this possible. We buffer data in memory as much as possible and fall-back to writing to a ".a" file after the first call to '_writepending'. diff -r 48a2eefd3d20 -r 7e97bf6ee2d6 mercurial/changelog.py --- a/mercurial/changelog.py Wed Nov 05 12:41:12 2014 -0600 +++ b/mercurial/changelog.py Sat Oct 18 01:12:18 2014 -0700 @@ -108,15 +108,21 @@ self.data.append(str(s)) self.offset += len(s) -def delayopener(opener, target, divert, buf): - def o(name, mode='r'): +def _divertopener(opener, target): + """build an opener that writes in 'target.a' instead of 'target'""" + def _divert(name, mode='r'): if name != target: return opener(name, mode) - if divert: - return opener(name + ".a", mode.replace('a', 'w')) - # otherwise, divert to memory + return opener(name + ".a", mode) + return _divert + +def _delayopener(opener, target, buf): + """build an opener that stores chunks in 'buf' instead of 'target'""" + def _delay(name, mode='r'): + if name != target: + return opener(name, mode) return appender(opener, name, mode, buf) - return o + return _delay class changelog(revlog.revlog): def __init__(self, opener): @@ -127,7 +133,7 @@ self._generaldelta = False self._realopener = opener self._delayed = False - self._delaybuf = [] + self._delaybuf = None self._divert = False self.filteredrevs = frozenset() @@ -220,11 +226,18 @@ def delayupdate(self): "delay visibility of index updates to other readers" + + if not self._delayed: + if len(self) == 0: + self._divert = True + if self._realopener.exists(self.indexfile + '.a'): + self._realopener.unlink(self.indexfile + '.a') + self.opener = _divertopener(self._realopener, self.indexfile) + else: + self._delaybuf = [] + self.opener = _delayopener(self._realopener, self.indexfile, + self._delaybuf) self._delayed = True - self._divert = (len(self) == 0) - self._delaybuf = [] - self.opener = delayopener(self._realopener, self.indexfile, - self._divert, self._delaybuf) def finalize(self, tr): "finalize index updates" @@ -232,6 +245,7 @@ self.opener = self._realopener # move redirected index data back into place if self._divert: + assert not self._delaybuf tmpname = self.indexfile + ".a" nfile = self.opener.open(tmpname) nfile.close() @@ -240,7 +254,8 @@ fp = self.opener(self.indexfile, 'a') fp.write("".join(self._delaybuf)) fp.close() - self._delaybuf = [] + self._delaybuf = None + self._divert = False # split when we're done self.checkinlinesize(tr) @@ -262,8 +277,9 @@ fp2.write("".join(self._delaybuf)) fp2.close() # switch modes so finalize can simply rename - self._delaybuf = [] + self._delaybuf = None self._divert = True + self.opener = _divertopener(self._realopener, self.indexfile) if self._divert: return True