restructure changelog file appending
authorMatt Mackall <mpm@selenic.com>
Thu, 22 Mar 2007 23:37:44 -0500
changeset 4261 cd7b36b7869c
parent 4260 bdbfc2193524
child 4265 94bb953b43e5
restructure changelog file appending - make appending code proper part of changelog with delayupdate/finalize - use simplified appender that tracks pending data in memory - eliminate old appendfile and helper classes - update addchangegroup to use new interface and reuse the existing changelog
mercurial/appendfile.py
mercurial/changelog.py
mercurial/localrepo.py
--- a/mercurial/appendfile.py	Thu Mar 22 20:10:46 2007 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-# appendfile.py - special classes to make repo updates atomic
-#
-# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
-#
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
-
-import cStringIO, changelog, errno, manifest, os, tempfile, util
-
-# writes to metadata files are ordered.  reads: changelog, manifest,
-# normal files.  writes: normal files, manifest, changelog.
-
-# manifest contains pointers to offsets in normal files.  changelog
-# contains pointers to offsets in manifest.  if reader reads old
-# changelog while manifest or normal files are written, it has no
-# pointers into new parts of those files that are maybe not consistent
-# yet, so will not read them.
-
-# localrepo.addchangegroup thinks it writes changelog first, then
-# manifest, then normal files (this is order they are available, and
-# needed for computing linkrev fields), but uses appendfile to hide
-# updates from readers.  data not written to manifest or changelog
-# until all normal files updated.  write manifest first, then
-# changelog.
-
-# with this write ordering, readers cannot see inconsistent view of
-# repo during update.
-
-class appendfile(object):
-    '''implement enough of file protocol to append to revlog file.
-    appended data is written to temp file.  reads and seeks span real
-    file and temp file.  readers cannot see appended data until
-    writedata called.'''
-
-    def __init__(self, fp, tmpname):
-        if tmpname:
-            self.tmpname = tmpname
-            self.tmpfp = util.posixfile(self.tmpname, 'ab+')
-        else:
-            fd, self.tmpname = tempfile.mkstemp(prefix="hg-appendfile-")
-            os.close(fd)
-            self.tmpfp = util.posixfile(self.tmpname, 'ab+')
-        self.realfp = fp
-        self.offset = fp.tell()
-        # real file is not written by anyone else. cache its size so
-        # seek and read can be fast.
-        self.realsize = util.fstat(fp).st_size
-        self.name = fp.name
-
-    def end(self):
-        self.tmpfp.flush() # make sure the stat is correct
-        return self.realsize + util.fstat(self.tmpfp).st_size
-
-    def tell(self):
-        return self.offset
-
-    def flush(self):
-        self.tmpfp.flush()
-
-    def close(self):
-        self.realfp.close()
-        self.tmpfp.close()
-
-    def seek(self, offset, whence=0):
-        '''virtual file offset spans real file and temp file.'''
-        if whence == 0:
-            self.offset = offset
-        elif whence == 1:
-            self.offset += offset
-        elif whence == 2:
-            self.offset = self.end() + offset
-
-        if self.offset < self.realsize:
-            self.realfp.seek(self.offset)
-        else:
-            self.tmpfp.seek(self.offset - self.realsize)
-
-    def read(self, count=-1):
-        '''only trick here is reads that span real file and temp file.'''
-        fp = cStringIO.StringIO()
-        old_offset = self.offset
-        if self.offset < self.realsize:
-            s = self.realfp.read(count)
-            fp.write(s)
-            self.offset += len(s)
-            if count > 0:
-                count -= len(s)
-        if count != 0:
-            if old_offset != self.offset:
-                self.tmpfp.seek(self.offset - self.realsize)
-            s = self.tmpfp.read(count)
-            fp.write(s)
-            self.offset += len(s)
-        return fp.getvalue()
-
-    def write(self, s):
-        '''append to temp file.'''
-        self.tmpfp.seek(0, 2)
-        self.tmpfp.write(s)
-        # all writes are appends, so offset must go to end of file.
-        self.offset = self.realsize + self.tmpfp.tell()
-
-class appendopener(object):
-    '''special opener for files that only read or append.'''
-
-    def __init__(self, opener):
-        self.realopener = opener
-        self.tmpname = None
-
-    def __call__(self, name, mode='r'):
-        '''open file.'''
-        # only handle .i file
-        if not name.endswith("."):
-            return self.realopener(name, mode)
-        assert mode in 'ra+'
-        try:
-            realfp = self.realopener(name, 'r')
-        except IOError, err:
-            if err.errno != errno.ENOENT: raise
-            self.realfp = self.realopener(name, 'w+')
-        fp = appendfile(realfp, self.tmpname)
-        if tmpname is None:
-            self.tmpname = fp.tmpname
-            self.name = name
-        return fp
-
-    def writedata(self):
-        '''copy data from temp files to real files.'''
-        if not self.tmpname:
-            return
-        ifp = open(self.tmpname, 'rb')
-        ofp = self.realopener(self.name, 'a')
-        for chunk in util.filechunkiter(ifp):
-            ofp.write(chunk)
-        ifp.close()
-        os.unlink(self.tmpname)
-        ofp.close()
-
-    def cleanup(self):
-        '''delete temp files (this discards unwritten data!)'''
-        if self.tmpname:
-            os.unlink(self.tmpname)
-
-# files for changelog and manifest are in different appendopeners, so
-# not mixed up together.
-
-class appendchangelog(changelog.changelog, appendopener):
-    def __init__(self, opener):
-        appendopener.__init__(self, opener)
-        changelog.changelog.__init__(self, self)
-    def checkinlinesize(self, fp, tr):
-        return
--- a/mercurial/changelog.py	Thu Mar 22 20:10:46 2007 -0500
+++ b/mercurial/changelog.py	Thu Mar 22 23:37:44 2007 -0500
@@ -26,10 +26,89 @@
 def _string_unescape(text):
     return text.decode('string_escape')
 
+class appender:
+    '''the changelog index must be update last on disk, so we use this class
+    to delay writes to it'''
+    def __init__(self, fp, buf):
+        self.data = buf
+        self.fp = fp
+        self.offset = fp.tell()
+        self.size = util.fstat(fp).st_size
+
+    def end(self):
+        return self.size + len("".join(self.data))
+    def tell(self):
+        return self.offset
+    def flush(self):
+        pass
+    def close(self):
+        close(self.fp)
+
+    def seek(self, offset, whence=0):
+        '''virtual file offset spans real file and data'''
+        if whence == 0:
+            self.offset = offset
+        elif whence == 1:
+            self.offset += offset
+        elif whence == 2:
+            self.offset = self.end() + offset
+        if self.offset < self.size:
+            self.fp.seek(self.offset)
+
+    def read(self, count=-1):
+        '''only trick here is reads that span real file and data'''
+        ret = ""
+        old_offset = self.offset
+        if self.offset < self.size:
+            s = self.fp.read(count)
+            ret = s
+            self.offset += len(s)
+            if count > 0:
+                count -= len(s)
+        if count != 0:
+            doff = self.offset - self.size
+            self.data.insert(0, "".join(self.data))
+            del self.data[1:]
+            s = self.data[0][doff:doff+count]
+            self.offset += len(s)
+            ret += s
+        return ret
+
+    def write(self, s):
+        self.data.append(s)
+        self.offset += len(s)
+
 class changelog(revlog):
     def __init__(self, opener):
         revlog.__init__(self, opener, "00changelog.i")
 
+    def delayupdate(self):
+        "delay visibility of index updates to other readers"
+        self._realopener = self.opener
+        self.opener = self._appendopener
+        self._delaybuf = []
+
+    def finalize(self, tr):
+        "finalize index updates"
+        self.opener = self._realopener
+        if self._delaybuf:
+            fp = self.opener(self.indexfile, 'a')
+            fp.write("".join(self._delaybuf))
+            fp.close()
+            del self._delaybuf
+        self.checkinlinesize(tr)
+
+    def _appendopener(self, name, mode='r'):
+        fp = self._realopener(name, mode)
+        if not name == self.indexfile:
+            return fp
+        return appender(fp, self._delaybuf)
+
+    def checkinlinesize(self, tr, fp=None):
+        if self.opener == self._appendopener:
+            return
+        return revlog.checkinlinesize(self, tr, fp)
+
     def decode_extra(self, text):
         extra = {}
         for l in text.split('\0'):
--- a/mercurial/localrepo.py	Thu Mar 22 20:10:46 2007 -0500
+++ b/mercurial/localrepo.py	Thu Mar 22 23:37:44 2007 -0500
@@ -7,7 +7,7 @@
 
 from node import *
 from i18n import _
-import repo, appendfile, changegroup
+import repo, changegroup
 import changelog, dirstate, filelog, manifest, context
 import re, lock, transaction, tempfile, stat, mdiff, errno, ui
 import os, revlog, time, util
@@ -1782,52 +1782,45 @@
 
         # write changelog data to temp files so concurrent readers will not see
         # inconsistent view
-        cl = None
-        try:
-            cl = appendfile.appendchangelog(self.sopener)
-            oldheads = len(cl.heads())
+        cl = self.changelog
+        cl.delayupdate()
+        oldheads = len(cl.heads())
+
+        # pull off the changeset group
+        self.ui.status(_("adding changesets\n"))
+        cor = cl.count() - 1
+        chunkiter = changegroup.chunkiter(source)
+        if cl.addgroup(chunkiter, csmap, tr, 1) is None:
+            raise util.Abort(_("received changelog group is empty"))
+        cnr = cl.count() - 1
+        changesets = cnr - cor
 
-            # pull off the changeset group
-            self.ui.status(_("adding changesets\n"))
-            cor = cl.count() - 1
-            chunkiter = changegroup.chunkiter(source)
-            if cl.addgroup(chunkiter, csmap, tr, 1) is None:
-                raise util.Abort(_("received changelog group is empty"))
-            cnr = cl.count() - 1
-            changesets = cnr - cor
+        # pull off the manifest group
+        self.ui.status(_("adding manifests\n"))
+        chunkiter = changegroup.chunkiter(source)
+        # no need to check for empty manifest group here:
+        # if the result of the merge of 1 and 2 is the same in 3 and 4,
+        # no new manifest will be created and the manifest group will
+        # be empty during the pull
+        self.manifest.addgroup(chunkiter, revmap, tr)
 
-            # pull off the manifest group
-            self.ui.status(_("adding manifests\n"))
+        # process the files
+        self.ui.status(_("adding file changes\n"))
+        while 1:
+            f = changegroup.getchunk(source)
+            if not f:
+                break
+            self.ui.debug(_("adding %s revisions\n") % f)
+            fl = self.file(f)
+            o = fl.count()
             chunkiter = changegroup.chunkiter(source)
-            # no need to check for empty manifest group here:
-            # if the result of the merge of 1 and 2 is the same in 3 and 4,
-            # no new manifest will be created and the manifest group will
-            # be empty during the pull
-            self.manifest.addgroup(chunkiter, revmap, tr)
-
-            # process the files
-            self.ui.status(_("adding file changes\n"))
-            while 1:
-                f = changegroup.getchunk(source)
-                if not f:
-                    break
-                self.ui.debug(_("adding %s revisions\n") % f)
-                fl = self.file(f)
-                o = fl.count()
-                chunkiter = changegroup.chunkiter(source)
-                if fl.addgroup(chunkiter, revmap, tr) is None:
-                    raise util.Abort(_("received file revlog group is empty"))
-                revisions += fl.count() - o
-                files += 1
-
-            cl.writedata()
-        finally:
-            if cl:
-                cl.cleanup()
+            if fl.addgroup(chunkiter, revmap, tr) is None:
+                raise util.Abort(_("received file revlog group is empty"))
+            revisions += fl.count() - o
+            files += 1
 
         # make changelog see real files again
-        self.changelog = changelog.changelog(self.sopener)
-        self.changelog.checkinlinesize(tr)
+        cl.finalize(tr)
 
         newheads = len(self.changelog.heads())
         heads = ""