changeset 23201:7e97bf6ee2d6

changelog: rework the delayupdate mechanism The current way we use the 'delayupdate' mechanism is wrong. We call 'delayupdate' right after the transaction retrieval, then we call 'finalize' right before calling 'tr.close()'. The 'finalize' call will -always- result in a flush to disk, making the data available to all readers. But the 'tr.close()' may be a no-op if the transaction is nested. This would result in data: 1) exposed to reader too early, 2) rolled back by other part of the transaction after such exposure So we need to end up in a situation where we call 'finalize' a single time when the transaction actually closes. For this purpose we need to be able to call 'delayupdate' and '_writepending' multiple times and 'finalize' once. This was not possible with the previous state of the code. This changeset refactors the code to makes this possible. We buffer data in memory as much as possible and fall-back to writing to a ".a" file after the first call to '_writepending'.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Sat, 18 Oct 2014 01:12:18 -0700
parents 48a2eefd3d20
children ea5af863fbff
files mercurial/changelog.py
diffstat 1 files changed, 29 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/changelog.py	Wed Nov 05 12:41:12 2014 -0600
+++ b/mercurial/changelog.py	Sat Oct 18 01:12:18 2014 -0700
@@ -108,15 +108,21 @@
         self.data.append(str(s))
         self.offset += len(s)
 
-def delayopener(opener, target, divert, buf):
-    def o(name, mode='r'):
+def _divertopener(opener, target):
+    """build an opener that writes in 'target.a' instead of 'target'"""
+    def _divert(name, mode='r'):
         if name != target:
             return opener(name, mode)
-        if divert:
-            return opener(name + ".a", mode.replace('a', 'w'))
-        # otherwise, divert to memory
+        return opener(name + ".a", mode)
+    return _divert
+
+def _delayopener(opener, target, buf):
+    """build an opener that stores chunks in 'buf' instead of 'target'"""
+    def _delay(name, mode='r'):
+        if name != target:
+            return opener(name, mode)
         return appender(opener, name, mode, buf)
-    return o
+    return _delay
 
 class changelog(revlog.revlog):
     def __init__(self, opener):
@@ -127,7 +133,7 @@
             self._generaldelta = False
         self._realopener = opener
         self._delayed = False
-        self._delaybuf = []
+        self._delaybuf = None
         self._divert = False
         self.filteredrevs = frozenset()
 
@@ -220,11 +226,18 @@
 
     def delayupdate(self):
         "delay visibility of index updates to other readers"
+
+        if not self._delayed:
+            if len(self) == 0:
+                self._divert = True
+                if self._realopener.exists(self.indexfile + '.a'):
+                    self._realopener.unlink(self.indexfile + '.a')
+                self.opener = _divertopener(self._realopener, self.indexfile)
+            else:
+                self._delaybuf = []
+                self.opener = _delayopener(self._realopener, self.indexfile,
+                                           self._delaybuf)
         self._delayed = True
-        self._divert = (len(self) == 0)
-        self._delaybuf = []
-        self.opener = delayopener(self._realopener, self.indexfile,
-                                  self._divert, self._delaybuf)
 
     def finalize(self, tr):
         "finalize index updates"
@@ -232,6 +245,7 @@
         self.opener = self._realopener
         # move redirected index data back into place
         if self._divert:
+            assert not self._delaybuf
             tmpname = self.indexfile + ".a"
             nfile = self.opener.open(tmpname)
             nfile.close()
@@ -240,7 +254,8 @@
             fp = self.opener(self.indexfile, 'a')
             fp.write("".join(self._delaybuf))
             fp.close()
-            self._delaybuf = []
+            self._delaybuf = None
+        self._divert = False
         # split when we're done
         self.checkinlinesize(tr)
 
@@ -262,8 +277,9 @@
             fp2.write("".join(self._delaybuf))
             fp2.close()
             # switch modes so finalize can simply rename
-            self._delaybuf = []
+            self._delaybuf = None
             self._divert = True
+            self.opener = _divertopener(self._realopener, self.indexfile)
 
         if self._divert:
             return True