# HG changeset patch # User Pierre-Yves David # Date 1579099679 -3600 # Node ID 563dfdfd01a4e050b5ea01796843cf2fd2d33ef9 # Parent 2b72c4ff8ed11cad198bcd60c46a05d94292ce0b nodemap: delete older raw data file when creating a new ones When we write new full files, it replace an older one with a different name. We add the associated cleanup for the older file to be removed after the transaction. We delete all file matching the expected pattern to give use extra chance to delete orphan files we might have failed to delete earlier. Note: eventually we won't rewrite all data for each transaction. This is coming in later changesets. Differential Revision: https://phab.mercurial-scm.org/D7839 diff -r 2b72c4ff8ed1 -r 563dfdfd01a4 mercurial/revlogutils/nodemap.py --- a/mercurial/revlogutils/nodemap.py Wed Jan 15 15:47:50 2020 +0100 +++ b/mercurial/revlogutils/nodemap.py Wed Jan 15 15:47:59 2020 +0100 @@ -9,6 +9,7 @@ from __future__ import absolute_import import os +import re import struct from .. import ( @@ -71,6 +72,16 @@ data = persistent_data(revlog.index) uid = _make_uid() datafile = _rawdata_filepath(revlog, uid) + olds = _other_rawdata_filepath(revlog, uid) + if olds: + realvfs = getattr(revlog, '_realopener', revlog.opener) + + def cleanup(tr): + for oldfile in olds: + realvfs.tryunlink(oldfile) + + callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file + tr.addpostclose(callback_id, cleanup) # EXP-TODO: if this is a cache, this should use a cache vfs, not a # store vfs with revlog.opener(datafile, b'w') as fd: @@ -136,6 +147,19 @@ return b"%s-%s.nd" % (prefix, uid) +def _other_rawdata_filepath(revlog, uid): + prefix = revlog.nodemap_file[:-2] + pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix) + new_file_path = _rawdata_filepath(revlog, uid) + new_file_name = revlog.opener.basename(new_file_path) + dirpath = revlog.opener.dirname(new_file_path) + others = [] + for f in revlog.opener.listdir(dirpath): + if pattern.match(f) and f != new_file_name: + others.append(f) + return others + + ### Nodemap Trie # # This is a simple reference implementation to compute and persist a nodemap diff -r 2b72c4ff8ed1 -r 563dfdfd01a4 tests/test-persistent-nodemap.t --- a/tests/test-persistent-nodemap.t Wed Jan 15 15:47:50 2020 +0100 +++ b/tests/test-persistent-nodemap.t Wed Jan 15 15:47:59 2020 +0100 @@ -12,6 +12,8 @@ $ hg debugbuilddag .+5000 $ f --size .hg/store/00changelog.n .hg/store/00changelog.n: size=18 + $ f --sha256 .hg/store/00changelog-*.nd + .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob) $ hg debugnodemap --dump-new | f --sha256 --size size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size @@ -32,3 +34,15 @@ 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................| + +add a new commit + + $ hg up + 0 files updated, 0 files merged, 0 files removed, 0 files unresolved + $ echo foo > foo + $ hg add foo + $ hg ci -m 'foo' + $ f --size .hg/store/00changelog.n + .hg/store/00changelog.n: size=18 + $ f --sha256 .hg/store/00changelog-*.nd --size + .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)