Mercurial > hg
changeset 23201:7e97bf6ee2d6
changelog: rework the delayupdate mechanism
The current way we use the 'delayupdate' mechanism is wrong. We call
'delayupdate' right after the transaction retrieval, then we call 'finalize'
right before calling 'tr.close()'. The 'finalize' call will -always- result in a
flush to disk, making the data available to all readers. But the 'tr.close()' may
be a no-op if the transaction is nested. This would result in data:
1) exposed to reader too early,
2) rolled back by other part of the transaction after such exposure
So we need to end up in a situation where we call 'finalize' a single time when
the transaction actually closes. For this purpose we need to be able to call
'delayupdate' and '_writepending' multiple times and 'finalize' once. This was
not possible with the previous state of the code.
This changeset refactors the code to makes this possible. We buffer data in memory
as much as possible and fall-back to writing to a ".a" file after the first call
to '_writepending'.
author | Pierre-Yves David <pierre-yves.david@fb.com> |
---|---|
date | Sat, 18 Oct 2014 01:12:18 -0700 |
parents | 48a2eefd3d20 |
children | ea5af863fbff |
files | mercurial/changelog.py |
diffstat | 1 files changed, 29 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/changelog.py Wed Nov 05 12:41:12 2014 -0600 +++ b/mercurial/changelog.py Sat Oct 18 01:12:18 2014 -0700 @@ -108,15 +108,21 @@ self.data.append(str(s)) self.offset += len(s) -def delayopener(opener, target, divert, buf): - def o(name, mode='r'): +def _divertopener(opener, target): + """build an opener that writes in 'target.a' instead of 'target'""" + def _divert(name, mode='r'): if name != target: return opener(name, mode) - if divert: - return opener(name + ".a", mode.replace('a', 'w')) - # otherwise, divert to memory + return opener(name + ".a", mode) + return _divert + +def _delayopener(opener, target, buf): + """build an opener that stores chunks in 'buf' instead of 'target'""" + def _delay(name, mode='r'): + if name != target: + return opener(name, mode) return appender(opener, name, mode, buf) - return o + return _delay class changelog(revlog.revlog): def __init__(self, opener): @@ -127,7 +133,7 @@ self._generaldelta = False self._realopener = opener self._delayed = False - self._delaybuf = [] + self._delaybuf = None self._divert = False self.filteredrevs = frozenset() @@ -220,11 +226,18 @@ def delayupdate(self): "delay visibility of index updates to other readers" + + if not self._delayed: + if len(self) == 0: + self._divert = True + if self._realopener.exists(self.indexfile + '.a'): + self._realopener.unlink(self.indexfile + '.a') + self.opener = _divertopener(self._realopener, self.indexfile) + else: + self._delaybuf = [] + self.opener = _delayopener(self._realopener, self.indexfile, + self._delaybuf) self._delayed = True - self._divert = (len(self) == 0) - self._delaybuf = [] - self.opener = delayopener(self._realopener, self.indexfile, - self._divert, self._delaybuf) def finalize(self, tr): "finalize index updates" @@ -232,6 +245,7 @@ self.opener = self._realopener # move redirected index data back into place if self._divert: + assert not self._delaybuf tmpname = self.indexfile + ".a" nfile = self.opener.open(tmpname) nfile.close() @@ -240,7 +254,8 @@ fp = self.opener(self.indexfile, 'a') fp.write("".join(self._delaybuf)) fp.close() - self._delaybuf = [] + self._delaybuf = None + self._divert = False # split when we're done self.checkinlinesize(tr) @@ -262,8 +277,9 @@ fp2.write("".join(self._delaybuf)) fp2.close() # switch modes so finalize can simply rename - self._delaybuf = [] + self._delaybuf = None self._divert = True + self.opener = _divertopener(self._realopener, self.indexfile) if self._divert: return True