changeset 44355:563dfdfd01a4

nodemap: delete older raw data file when creating a new ones When we write new full files, it replace an older one with a different name. We add the associated cleanup for the older file to be removed after the transaction. We delete all file matching the expected pattern to give use extra chance to delete orphan files we might have failed to delete earlier. Note: eventually we won't rewrite all data for each transaction. This is coming in later changesets. Differential Revision: https://phab.mercurial-scm.org/D7839
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Wed, 15 Jan 2020 15:47:59 +0100
parents 2b72c4ff8ed1
children 6f9e8e142cea
files mercurial/revlogutils/nodemap.py tests/test-persistent-nodemap.t
diffstat 2 files changed, 38 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/revlogutils/nodemap.py	Wed Jan 15 15:47:50 2020 +0100
+++ b/mercurial/revlogutils/nodemap.py	Wed Jan 15 15:47:59 2020 +0100
@@ -9,6 +9,7 @@
 from __future__ import absolute_import
 
 import os
+import re
 import struct
 
 from .. import (
@@ -71,6 +72,16 @@
     data = persistent_data(revlog.index)
     uid = _make_uid()
     datafile = _rawdata_filepath(revlog, uid)
+    olds = _other_rawdata_filepath(revlog, uid)
+    if olds:
+        realvfs = getattr(revlog, '_realopener', revlog.opener)
+
+        def cleanup(tr):
+            for oldfile in olds:
+                realvfs.tryunlink(oldfile)
+
+        callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
+        tr.addpostclose(callback_id, cleanup)
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
     with revlog.opener(datafile, b'w') as fd:
@@ -136,6 +147,19 @@
     return b"%s-%s.nd" % (prefix, uid)
 
 
+def _other_rawdata_filepath(revlog, uid):
+    prefix = revlog.nodemap_file[:-2]
+    pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
+    new_file_path = _rawdata_filepath(revlog, uid)
+    new_file_name = revlog.opener.basename(new_file_path)
+    dirpath = revlog.opener.dirname(new_file_path)
+    others = []
+    for f in revlog.opener.listdir(dirpath):
+        if pattern.match(f) and f != new_file_name:
+            others.append(f)
+    return others
+
+
 ### Nodemap Trie
 #
 # This is a simple reference implementation to compute and persist a nodemap
--- a/tests/test-persistent-nodemap.t	Wed Jan 15 15:47:50 2020 +0100
+++ b/tests/test-persistent-nodemap.t	Wed Jan 15 15:47:59 2020 +0100
@@ -12,6 +12,8 @@
   $ hg debugbuilddag .+5000
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+  $ f --sha256 .hg/store/00changelog-*.nd
+  .hg/store/00changelog-????????????????.nd: sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7 (glob)
   $ hg debugnodemap --dump-new | f --sha256 --size
   size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
   $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
@@ -32,3 +34,15 @@
   00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
   00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
   00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+
+add a new commit
+
+  $ hg up
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ echo foo > foo
+  $ hg add foo
+  $ hg ci -m 'foo'
+  $ f --size .hg/store/00changelog.n
+  .hg/store/00changelog.n: size=18
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)