changeset 40493:7caf632e30c3

filecache: unimplement __set__() and __delete__() (API) Implementing __set__() implies that the descriptor can't be overridden by obj.__dict__, which means any property access involves slow function call. "Data descriptors with __set__() and __get__() defined always override a redefinition in an instance dictionary. In contrast, non-data descriptors can be overridden by instances." https://docs.python.org/2.7/reference/datamodel.html#invoking-descriptors This patch basically backs out 236bb604dc39, "scmutil: update cached copy when filecached attribute is assigned (issue3263)." The problem described in issue3263 (which is #3264 in Bugzilla) should no longer happen since repo._bookmarkcurrent has been moved to repo._bookmarks.active. We still have a risk of introducing similar bugs, but I think that's the cost we have to pay. $ hg perfrevset 'branch(tip)' -R mercurial (orig) wall 0.139511 comb 0.140000 user 0.140000 sys 0.000000 (best of 66) (prev) wall 0.114195 comb 0.110000 user 0.110000 sys 0.000000 (best of 81) (this) wall 0.099038 comb 0.110000 user 0.100000 sys 0.010000 (best of 93)
author Yuya Nishihara <yuya@tcha.org>
date Sat, 20 Oct 2018 17:56:00 +0900
parents 597bb5a6867f
children 5e5c8f2a1eb5
files mercurial/dirstate.py mercurial/localrepo.py mercurial/scmutil.py tests/test-filecache.py
diffstat 4 files changed, 20 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirstate.py	Sat Oct 20 19:13:05 2018 +0900
+++ b/mercurial/dirstate.py	Sat Oct 20 17:56:00 2018 +0900
@@ -317,7 +317,7 @@
         return copies
 
     def setbranch(self, branch):
-        self._branch = encoding.fromlocal(branch)
+        self.__class__._branch.set(self, encoding.fromlocal(branch))
         f = self._opener('branch', 'w', atomictemp=True, checkambig=True)
         try:
             f.write(self._branch + '\n')
--- a/mercurial/localrepo.py	Sat Oct 20 19:13:05 2018 +0900
+++ b/mercurial/localrepo.py	Sat Oct 20 17:56:00 2018 +0900
@@ -91,17 +91,16 @@
     def __get__(self, repo, type=None):
         if repo is None:
             return self
-        # inlined the fast path as the cost of function call matters
+        # proxy to unfiltered __dict__ since filtered repo has no entry
         unfi = repo.unfiltered()
         try:
             return unfi.__dict__[self.sname]
         except KeyError:
             pass
         return super(_basefilecache, self).__get__(unfi, type)
-    def __set__(self, repo, value):
-        return super(_basefilecache, self).__set__(repo.unfiltered(), value)
-    def __delete__(self, repo):
-        return super(_basefilecache, self).__delete__(repo.unfiltered())
+
+    def set(self, repo, value):
+        return super(_basefilecache, self).set(repo.unfiltered(), value)
 
 class repofilecache(_basefilecache):
     """filecache for files in .hg but outside of .hg/store"""
--- a/mercurial/scmutil.py	Sat Oct 20 19:13:05 2018 +0900
+++ b/mercurial/scmutil.py	Sat Oct 20 17:56:00 2018 +0900
@@ -1249,16 +1249,15 @@
     results cached. The decorated function is called. The results are stashed
     away in a ``_filecache`` dict on the object whose method is decorated.
 
-    On subsequent access, the cached result is returned.
-
-    On external property set operations, stat() calls are performed and the new
-    value is cached.
+    On subsequent access, the cached result is used as it is set to the
+    instance dictionary.
 
-    On property delete operations, cached data is removed.
+    On external property set/delete operations, the caller must update the
+    corresponding _filecache entry appropriately. Use __class__.<attr>.set()
+    instead of directly setting <attr>.
 
-    When using the property API, cached data is always returned, if available:
-    no stat() is performed to check if the file has changed and if the function
-    needs to be called to reflect file changes.
+    When using the property API, the cached data is always used if available.
+    No stat() is performed to check if the file has changed.
 
     Others can muck about with the state of the ``_filecache`` dict. e.g. they
     can populate an entry before the property's getter is called. In this case,
@@ -1291,11 +1290,8 @@
         # if accessed on the class, return the descriptor itself.
         if obj is None:
             return self
-        # do we need to check if the file changed?
-        try:
-            return obj.__dict__[self.sname]
-        except KeyError:
-            pass
+
+        assert self.sname not in obj.__dict__
 
         entry = obj._filecache.get(self.name)
 
@@ -1315,7 +1311,10 @@
         obj.__dict__[self.sname] = entry.obj
         return entry.obj
 
-    def __set__(self, obj, value):
+    # don't implement __set__(), which would make __dict__ lookup as slow as
+    # function call.
+
+    def set(self, obj, value):
         if self.name not in obj._filecache:
             # we add an entry for the missing value because X in __dict__
             # implies X in _filecache
@@ -1328,12 +1327,6 @@
         ce.obj = value # update cached copy
         obj.__dict__[self.sname] = value # update copy returned by obj.x
 
-    def __delete__(self, obj):
-        try:
-            del obj.__dict__[self.sname]
-        except KeyError:
-            raise AttributeError(self.sname)
-
 def extdatasource(repo, source):
     """Gather a map of rev -> value dict from the specified source
 
--- a/tests/test-filecache.py	Sat Oct 20 19:13:05 2018 +0900
+++ b/tests/test-filecache.py	Sat Oct 20 17:56:00 2018 +0900
@@ -177,7 +177,7 @@
 def setbeforeget(repo):
     os.remove('x')
     os.remove('y')
-    repo.cached = 'string set externally'
+    repo.__class__.cached.set(repo, 'string set externally')
     repo.invalidate()
     print("* neither file exists")
     print(repo.cached)
@@ -188,7 +188,7 @@
     print("* file x created")
     print(repo.cached)
 
-    repo.cached = 'string 2 set externally'
+    repo.__class__.cached.set(repo, 'string 2 set externally')
     repo.invalidate()
     print("* string set externally again")
     print(repo.cached)