Mercurial > evolve
changeset 2332:77c184c80e3e
merge with stable
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 04 May 2017 21:24:02 +0200 |
parents | d72c8c1f09e2 (diff) d49f376598f8 (current diff) |
children | adf114c767ab |
files | |
diffstat | 1 files changed, 92 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext3rd/evolve/obscache.py Thu May 04 21:21:59 2017 +0200 +++ b/hgext3rd/evolve/obscache.py Thu May 04 21:24:02 2017 +0200 @@ -13,6 +13,7 @@ import errno from mercurial import ( + error, localrepo, obsolete, phases, @@ -20,6 +21,8 @@ util, ) +from mercurial.i18n import _ + from . import ( exthelper, ) @@ -160,6 +163,31 @@ return True, startrev, startidx + +# XXX copied as is from Mercurial 4.2 and added the "offset" parameters +@util.nogc +def _readmarkers(data, offset=None): + """Read and enumerate markers from raw data""" + off = 0 + diskversion = struct.unpack('>B', data[off:off + 1])[0] + if offset is None: + off += 1 + else: + assert 1 <= offset + off = offset + if diskversion not in obsolete.formats: + raise error.Abort(_('parsing obsolete marker: unknown version %r') + % diskversion) + return diskversion, obsolete.formats[diskversion][0](data, off) + +def markersfrom(obsstore, byteoffset, firstmarker): + if '_all' in vars(obsstore): + # if the data are in memory, just use that + return obsstore._all[firstmarker:] + else: + obsdata = obsstore.svfs.tryread('obsstore') + return _readmarkers(obsdata, byteoffset)[1] + class obscache(object): """cache the "does a rev" is the precursors of some obsmarkers data @@ -239,66 +267,70 @@ if startrev is None and startidx is None: return + # checks we never run 'update' without a lock + # + # There are a potential race condition otherwise, since the repo + # "might" have changed side the cache update above. However, this code + # will only be running in a lock so we ignore the issue for now. + # + # Lift this limitation, 'upgradeneeded' should return a bounded amount + # of changeset and markers to read with their associated cachekey. see + # 'upgradeneeded' for detail. + assert repo._currentlock(repo._lockref) is not None + # process the new changesets cl = repo.changelog if startrev is not None: - node = cl.node - # Note: - # - # Newly added changeset might be affected by obsolescence markers - # we already have locally. So we needs to have soem global - # knowledge about the markers to handle that question. Right this - # requires parsing all markers in the obsstore. However, we could - # imagine using various optimisation (eg: bloom filter, other on - # disk cache) to remove this full parsing. - # - # For now we stick to the simpler approach or paying the - # performance cost on new changesets. - succs = repo.obsstore.successors - for r in cl.revs(startrev): - if node(r) in succs: - val = 1 - else: - val = 0 - self._data.append(val) + self._updaterevs(repo, cl.revs(startrev)) assert len(self._data) == len(cl), (len(self._data), len(cl)) # process the new obsmarkers if startidx is not None: - rev = cl.nodemap.get - markers = repo.obsstore._all - # Note: - # - # There are no actually needs to load the full obsstore here, - # since we only read the latest ones. We do it for simplicity in - # the first implementation. Loading the full obsstore has a - # performance cost and should go away in this case too. We have - # two simples options for that: - # - # 1) provide and API to start reading markers from a byte offset - # (we have that data in the cache key) - # - # 2) directly update the cache at a lower level, in the code - # responsible for adding a markers. - # - # Option 2 is probably a bit more invasive, but more solid on the long run + if startidx == 0: # all markers + markers = repo.obsstore._all + else: + markers = markersfrom(repo.obsstore, self._cachekey[3], startidx) + self._updatemarkers(repo, markers) + + self._cachekey = getcachekey(repo) + + def _updaterevs(self, repo, revs): + """update the cache with new revisions + + Newly added changeset might be affected by obsolescence markers + we already have locally. So we needs to have some global + knowledge about the markers to handle that question. + + Right now this requires parsing all markers in the obsstore. We could + imagine using various optimisation (eg: another cache, network + exchange, etc). - for i in xrange(startidx, len(repo.obsstore)): - r = rev(markers[i][0]) - # If markers affect a newly added nodes, it would have been - # caught in the previous loop, (so we skip < startrev) - if r is not None and (startrev is None or r < startrev): - self._data[r] = 1 + A possible approach to this is to build a set of all node used as + precursors in `obsstore._obscandidate`. If markers are not loaded yet, + we could initialize it by doing a quick scan through the obsstore data + and filling a (pre-sized) set. Doing so would be much faster than + parsing all the obsmarkers since we would access less data, not create + any object beside the nodes and not have to decode any complex data. - assert repo._currentlock(repo._lockref) is not None - # XXX note that there are a potential race condition here, since the - # repo "might" have changed side the cache update above. However, this - # code will only be running in a lock so we ignore the issue for now. - # - # To work around this, 'upgradeneeded' should return a bounded amount - # of changeset and markers to read with their associated cachekey. see - # 'upgradeneeded' for detail. - self._cachekey = getcachekey(repo) + For now we stick to the simpler approach of paying the + performance cost on new changesets. + """ + node = repo.changelog.node + succs = repo.obsstore.successors + for r in revs: + if node(r) in succs: + val = 1 + else: + val = 0 + self._data.append(val) + + def _updatemarkers(self, repo, obsmarkers): + """update the cache with new markers""" + rev = repo.changelog.nodemap.get + for m in obsmarkers: + r = rev(m[0]) + if r is not None: + self._data[r] = 1 def save(self, repo): """save the data to disk""" @@ -339,7 +371,7 @@ if notpublic: obscache = repo.obsstore.obscache # Since we warm the cache at the end of every transaction, the cache - # should be up to date. However a non-enabled client might have touced + # should be up to date. However a non-enabled client might have touched # the repository. # # Updating the cache without a lock is sloppy, so we fallback to the @@ -348,7 +380,7 @@ # # With the current implementation updating the cache will requires to # load the obsstore anyway. Once loaded, hitting the obsstore directly - # will be about as fast.. + # will be about as fast... if not obscache.uptodate(repo): if repo.currenttransaction() is None: repo.ui.log('evoext-obscache', @@ -357,8 +389,9 @@ repo.ui.debug('obscache is out of date') return orig(repo) else: - # If a transaction is open, it is worthwhile to update and use the - # cache as it will be written on disk when the transaction close. + # If a transaction is open, it is worthwhile to update and use + # the cache, the lock prevent race and it will be written on + # disk when the transaction close. obscache.update(repo) isobs = obscache.get for r in notpublic: @@ -391,11 +424,10 @@ if repo is None: return repo = repo.unfiltered() - # As pointed in 'obscache.update', we could have the - # changelog and the obsstore in charge of updating the - # cache when new items goes it. The tranaction logic would - # then only be involved for the 'pending' and final saving - # logic. + # As pointed in 'obscache.update', we could have the changelog + # and the obsstore in charge of updating the cache when new + # items goes it. The tranaction logic would then only be + # involved for the 'pending' and final writing on disk. self.obsstore.obscache.update(repo) self.obsstore.obscache.save(repo)