scmutil: introduce filecache
The idea is being able to associate a file with a property, and watch
that file stat info for modifications when we decide it's important for it to
be up-to-date. Once it changes, we recreate the object.
On filesystems that can't uniquely identify a file, we always recreate.
As a consequence, localrepo.invalidate() will become much less expensive in the
case where nothing changed on-disk.
--- a/mercurial/scmutil.py Mon Jul 25 15:03:02 2011 +0300
+++ b/mercurial/scmutil.py Sat Jul 09 19:06:59 2011 +0300
@@ -709,3 +709,95 @@
raise error.RequirementError(_("unknown repository format: "
"requires features '%s' (upgrade Mercurial)") % "', '".join(missings))
return requirements
+
+class filecacheentry(object):
+ def __init__(self, path):
+ self.path = path
+ self.cachestat = filecacheentry.stat(self.path)
+
+ if self.cachestat:
+ self._cacheable = self.cachestat.cacheable()
+ else:
+ # None means we don't know yet
+ self._cacheable = None
+
+ def refresh(self):
+ if self.cacheable():
+ self.cachestat = filecacheentry.stat(self.path)
+
+ def cacheable(self):
+ if self._cacheable is not None:
+ return self._cacheable
+
+ # we don't know yet, assume it is for now
+ return True
+
+ def changed(self):
+ # no point in going further if we can't cache it
+ if not self.cacheable():
+ return True
+
+ newstat = filecacheentry.stat(self.path)
+
+ # we may not know if it's cacheable yet, check again now
+ if newstat and self._cacheable is None:
+ self._cacheable = newstat.cacheable()
+
+ # check again
+ if not self._cacheable:
+ return True
+
+ if self.cachestat != newstat:
+ self.cachestat = newstat
+ return True
+ else:
+ return False
+
+ @staticmethod
+ def stat(path):
+ try:
+ return util.cachestat(path)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ raise
+
+class filecache(object):
+ '''A property like decorator that tracks a file under .hg/ for updates.
+
+ Records stat info when called in _filecache.
+
+ On subsequent calls, compares old stat info with new info, and recreates
+ the object when needed, updating the new stat info in _filecache.
+
+ Mercurial either atomic renames or appends for files under .hg,
+ so to ensure the cache is reliable we need the filesystem to be able
+ to tell us if a file has been replaced. If it can't, we fallback to
+ recreating the object on every call (essentially the same behaviour as
+ propertycache).'''
+ def __init__(self, path, instore=False):
+ self.path = path
+ self.instore = instore
+
+ def __call__(self, func):
+ self.func = func
+ self.name = func.__name__
+ return self
+
+ def __get__(self, obj, type=None):
+ entry = obj._filecache.get(self.name)
+
+ if entry:
+ if entry.changed():
+ entry.obj = self.func(obj)
+ else:
+ path = self.instore and obj.sjoin(self.path) or obj.join(self.path)
+
+ # We stat -before- creating the object so our cache doesn't lie if
+ # a writer modified between the time we read and stat
+ entry = filecacheentry(path)
+ entry.obj = self.func(obj)
+
+ obj._filecache[self.name] = entry
+
+ setattr(obj, self.name, entry.obj)
+ return entry.obj
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-filecache.py Sat Jul 09 19:06:59 2011 +0300
@@ -0,0 +1,95 @@
+import sys, os, subprocess
+
+try:
+ subprocess.check_call(['%s/hghave' % os.environ['TESTDIR'], 'cacheable'])
+except subprocess.CalledProcessError:
+ sys.exit(80)
+
+from mercurial import util, scmutil, extensions
+
+filecache = scmutil.filecache
+
+class fakerepo(object):
+ def __init__(self):
+ self._filecache = {}
+
+ def join(self, p):
+ return p
+
+ def sjoin(self, p):
+ return p
+
+ @filecache('x')
+ def cached(self):
+ print 'creating'
+
+ def invalidate(self):
+ for k in self._filecache:
+ try:
+ delattr(self, k)
+ except AttributeError:
+ pass
+
+def basic(repo):
+ # file doesn't exist, calls function
+ repo.cached
+
+ repo.invalidate()
+ # file still doesn't exist, uses cache
+ repo.cached
+
+ # create empty file
+ f = open('x', 'w')
+ f.close()
+ repo.invalidate()
+ # should recreate the object
+ repo.cached
+
+ f = open('x', 'w')
+ f.write('a')
+ f.close()
+ repo.invalidate()
+ # should recreate the object
+ repo.cached
+
+ repo.invalidate()
+ # stats file again, nothing changed, reuses object
+ repo.cached
+
+ # atomic replace file, size doesn't change
+ # hopefully st_mtime doesn't change as well so this doesn't use the cache
+ # because of inode change
+ f = scmutil.opener('.')('x', 'w', atomictemp=True)
+ f.write('b')
+ f.rename()
+
+ repo.invalidate()
+ repo.cached
+
+def fakeuncacheable():
+ def wrapcacheable(orig, *args, **kwargs):
+ return False
+
+ def wrapinit(orig, *args, **kwargs):
+ pass
+
+ originit = extensions.wrapfunction(util.cachestat, '__init__', wrapinit)
+ origcacheable = extensions.wrapfunction(util.cachestat, 'cacheable', wrapcacheable)
+
+ try:
+ os.remove('x')
+ except:
+ pass
+
+ basic(fakerepo())
+
+ util.cachestat.cacheable = origcacheable
+ util.cachestat.__init__ = originit
+
+print 'basic:'
+print
+basic(fakerepo())
+print
+print 'fakeuncacheable:'
+print
+fakeuncacheable()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-filecache.py.out Sat Jul 09 19:06:59 2011 +0300
@@ -0,0 +1,15 @@
+basic:
+
+creating
+creating
+creating
+creating
+
+fakeuncacheable:
+
+creating
+creating
+creating
+creating
+creating
+creating