restructure changelog file appending
- make appending code proper part of changelog with delayupdate/finalize
- use simplified appender that tracks pending data in memory
- eliminate old appendfile and helper classes
- update addchangegroup to use new interface and reuse the existing changelog
--- a/mercurial/appendfile.py Thu Mar 22 20:10:46 2007 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,152 +0,0 @@
-# appendfile.py - special classes to make repo updates atomic
-#
-# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
-#
-# This software may be used and distributed according to the terms
-# of the GNU General Public License, incorporated herein by reference.
-
-import cStringIO, changelog, errno, manifest, os, tempfile, util
-
-# writes to metadata files are ordered. reads: changelog, manifest,
-# normal files. writes: normal files, manifest, changelog.
-
-# manifest contains pointers to offsets in normal files. changelog
-# contains pointers to offsets in manifest. if reader reads old
-# changelog while manifest or normal files are written, it has no
-# pointers into new parts of those files that are maybe not consistent
-# yet, so will not read them.
-
-# localrepo.addchangegroup thinks it writes changelog first, then
-# manifest, then normal files (this is order they are available, and
-# needed for computing linkrev fields), but uses appendfile to hide
-# updates from readers. data not written to manifest or changelog
-# until all normal files updated. write manifest first, then
-# changelog.
-
-# with this write ordering, readers cannot see inconsistent view of
-# repo during update.
-
-class appendfile(object):
- '''implement enough of file protocol to append to revlog file.
- appended data is written to temp file. reads and seeks span real
- file and temp file. readers cannot see appended data until
- writedata called.'''
-
- def __init__(self, fp, tmpname):
- if tmpname:
- self.tmpname = tmpname
- self.tmpfp = util.posixfile(self.tmpname, 'ab+')
- else:
- fd, self.tmpname = tempfile.mkstemp(prefix="hg-appendfile-")
- os.close(fd)
- self.tmpfp = util.posixfile(self.tmpname, 'ab+')
- self.realfp = fp
- self.offset = fp.tell()
- # real file is not written by anyone else. cache its size so
- # seek and read can be fast.
- self.realsize = util.fstat(fp).st_size
- self.name = fp.name
-
- def end(self):
- self.tmpfp.flush() # make sure the stat is correct
- return self.realsize + util.fstat(self.tmpfp).st_size
-
- def tell(self):
- return self.offset
-
- def flush(self):
- self.tmpfp.flush()
-
- def close(self):
- self.realfp.close()
- self.tmpfp.close()
-
- def seek(self, offset, whence=0):
- '''virtual file offset spans real file and temp file.'''
- if whence == 0:
- self.offset = offset
- elif whence == 1:
- self.offset += offset
- elif whence == 2:
- self.offset = self.end() + offset
-
- if self.offset < self.realsize:
- self.realfp.seek(self.offset)
- else:
- self.tmpfp.seek(self.offset - self.realsize)
-
- def read(self, count=-1):
- '''only trick here is reads that span real file and temp file.'''
- fp = cStringIO.StringIO()
- old_offset = self.offset
- if self.offset < self.realsize:
- s = self.realfp.read(count)
- fp.write(s)
- self.offset += len(s)
- if count > 0:
- count -= len(s)
- if count != 0:
- if old_offset != self.offset:
- self.tmpfp.seek(self.offset - self.realsize)
- s = self.tmpfp.read(count)
- fp.write(s)
- self.offset += len(s)
- return fp.getvalue()
-
- def write(self, s):
- '''append to temp file.'''
- self.tmpfp.seek(0, 2)
- self.tmpfp.write(s)
- # all writes are appends, so offset must go to end of file.
- self.offset = self.realsize + self.tmpfp.tell()
-
-class appendopener(object):
- '''special opener for files that only read or append.'''
-
- def __init__(self, opener):
- self.realopener = opener
- self.tmpname = None
-
- def __call__(self, name, mode='r'):
- '''open file.'''
- # only handle .i file
- if not name.endswith("."):
- return self.realopener(name, mode)
- assert mode in 'ra+'
- try:
- realfp = self.realopener(name, 'r')
- except IOError, err:
- if err.errno != errno.ENOENT: raise
- self.realfp = self.realopener(name, 'w+')
- fp = appendfile(realfp, self.tmpname)
- if tmpname is None:
- self.tmpname = fp.tmpname
- self.name = name
- return fp
-
- def writedata(self):
- '''copy data from temp files to real files.'''
- if not self.tmpname:
- return
- ifp = open(self.tmpname, 'rb')
- ofp = self.realopener(self.name, 'a')
- for chunk in util.filechunkiter(ifp):
- ofp.write(chunk)
- ifp.close()
- os.unlink(self.tmpname)
- ofp.close()
-
- def cleanup(self):
- '''delete temp files (this discards unwritten data!)'''
- if self.tmpname:
- os.unlink(self.tmpname)
-
-# files for changelog and manifest are in different appendopeners, so
-# not mixed up together.
-
-class appendchangelog(changelog.changelog, appendopener):
- def __init__(self, opener):
- appendopener.__init__(self, opener)
- changelog.changelog.__init__(self, self)
- def checkinlinesize(self, fp, tr):
- return
--- a/mercurial/changelog.py Thu Mar 22 20:10:46 2007 -0500
+++ b/mercurial/changelog.py Thu Mar 22 23:37:44 2007 -0500
@@ -26,10 +26,89 @@
def _string_unescape(text):
return text.decode('string_escape')
+class appender:
+ '''the changelog index must be update last on disk, so we use this class
+ to delay writes to it'''
+ def __init__(self, fp, buf):
+ self.data = buf
+ self.fp = fp
+ self.offset = fp.tell()
+ self.size = util.fstat(fp).st_size
+
+ def end(self):
+ return self.size + len("".join(self.data))
+ def tell(self):
+ return self.offset
+ def flush(self):
+ pass
+ def close(self):
+ close(self.fp)
+
+ def seek(self, offset, whence=0):
+ '''virtual file offset spans real file and data'''
+ if whence == 0:
+ self.offset = offset
+ elif whence == 1:
+ self.offset += offset
+ elif whence == 2:
+ self.offset = self.end() + offset
+ if self.offset < self.size:
+ self.fp.seek(self.offset)
+
+ def read(self, count=-1):
+ '''only trick here is reads that span real file and data'''
+ ret = ""
+ old_offset = self.offset
+ if self.offset < self.size:
+ s = self.fp.read(count)
+ ret = s
+ self.offset += len(s)
+ if count > 0:
+ count -= len(s)
+ if count != 0:
+ doff = self.offset - self.size
+ self.data.insert(0, "".join(self.data))
+ del self.data[1:]
+ s = self.data[0][doff:doff+count]
+ self.offset += len(s)
+ ret += s
+ return ret
+
+ def write(self, s):
+ self.data.append(s)
+ self.offset += len(s)
+
class changelog(revlog):
def __init__(self, opener):
revlog.__init__(self, opener, "00changelog.i")
+ def delayupdate(self):
+ "delay visibility of index updates to other readers"
+ self._realopener = self.opener
+ self.opener = self._appendopener
+ self._delaybuf = []
+
+ def finalize(self, tr):
+ "finalize index updates"
+ self.opener = self._realopener
+ if self._delaybuf:
+ fp = self.opener(self.indexfile, 'a')
+ fp.write("".join(self._delaybuf))
+ fp.close()
+ del self._delaybuf
+ self.checkinlinesize(tr)
+
+ def _appendopener(self, name, mode='r'):
+ fp = self._realopener(name, mode)
+ if not name == self.indexfile:
+ return fp
+ return appender(fp, self._delaybuf)
+
+ def checkinlinesize(self, tr, fp=None):
+ if self.opener == self._appendopener:
+ return
+ return revlog.checkinlinesize(self, tr, fp)
+
def decode_extra(self, text):
extra = {}
for l in text.split('\0'):
--- a/mercurial/localrepo.py Thu Mar 22 20:10:46 2007 -0500
+++ b/mercurial/localrepo.py Thu Mar 22 23:37:44 2007 -0500
@@ -7,7 +7,7 @@
from node import *
from i18n import _
-import repo, appendfile, changegroup
+import repo, changegroup
import changelog, dirstate, filelog, manifest, context
import re, lock, transaction, tempfile, stat, mdiff, errno, ui
import os, revlog, time, util
@@ -1782,52 +1782,45 @@
# write changelog data to temp files so concurrent readers will not see
# inconsistent view
- cl = None
- try:
- cl = appendfile.appendchangelog(self.sopener)
- oldheads = len(cl.heads())
+ cl = self.changelog
+ cl.delayupdate()
+ oldheads = len(cl.heads())
+
+ # pull off the changeset group
+ self.ui.status(_("adding changesets\n"))
+ cor = cl.count() - 1
+ chunkiter = changegroup.chunkiter(source)
+ if cl.addgroup(chunkiter, csmap, tr, 1) is None:
+ raise util.Abort(_("received changelog group is empty"))
+ cnr = cl.count() - 1
+ changesets = cnr - cor
- # pull off the changeset group
- self.ui.status(_("adding changesets\n"))
- cor = cl.count() - 1
- chunkiter = changegroup.chunkiter(source)
- if cl.addgroup(chunkiter, csmap, tr, 1) is None:
- raise util.Abort(_("received changelog group is empty"))
- cnr = cl.count() - 1
- changesets = cnr - cor
+ # pull off the manifest group
+ self.ui.status(_("adding manifests\n"))
+ chunkiter = changegroup.chunkiter(source)
+ # no need to check for empty manifest group here:
+ # if the result of the merge of 1 and 2 is the same in 3 and 4,
+ # no new manifest will be created and the manifest group will
+ # be empty during the pull
+ self.manifest.addgroup(chunkiter, revmap, tr)
- # pull off the manifest group
- self.ui.status(_("adding manifests\n"))
+ # process the files
+ self.ui.status(_("adding file changes\n"))
+ while 1:
+ f = changegroup.getchunk(source)
+ if not f:
+ break
+ self.ui.debug(_("adding %s revisions\n") % f)
+ fl = self.file(f)
+ o = fl.count()
chunkiter = changegroup.chunkiter(source)
- # no need to check for empty manifest group here:
- # if the result of the merge of 1 and 2 is the same in 3 and 4,
- # no new manifest will be created and the manifest group will
- # be empty during the pull
- self.manifest.addgroup(chunkiter, revmap, tr)
-
- # process the files
- self.ui.status(_("adding file changes\n"))
- while 1:
- f = changegroup.getchunk(source)
- if not f:
- break
- self.ui.debug(_("adding %s revisions\n") % f)
- fl = self.file(f)
- o = fl.count()
- chunkiter = changegroup.chunkiter(source)
- if fl.addgroup(chunkiter, revmap, tr) is None:
- raise util.Abort(_("received file revlog group is empty"))
- revisions += fl.count() - o
- files += 1
-
- cl.writedata()
- finally:
- if cl:
- cl.cleanup()
+ if fl.addgroup(chunkiter, revmap, tr) is None:
+ raise util.Abort(_("received file revlog group is empty"))
+ revisions += fl.count() - o
+ files += 1
# make changelog see real files again
- self.changelog = changelog.changelog(self.sopener)
- self.changelog.checkinlinesize(tr)
+ cl.finalize(tr)
newheads = len(self.changelog.heads())
heads = ""