contrib/hg-relink
changeset 4249 7663780b55a7
child 4270 29eb88bd5c8d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/hg-relink	Mon Mar 19 09:36:06 2007 -0700
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import os, sys
+
+class ConfigError(Exception): pass
+
+def usage():
+    print """relink <source> <destination>
+    Recreate hard links between source and destination repositories"""
+
+class Config:
+    def __init__(self, args):
+        if len(args) != 3:
+            raise ConfigError("wrong number of arguments")
+        self.src = os.path.abspath(args[1])
+        self.dst = os.path.abspath(args[2])
+        for d in (self.src, self.dst):
+            if not os.path.exists(os.path.join(d, '.hg')):
+                raise ConfigError("%s: not a mercurial repository" % d)
+
+try:
+    cfg = Config(sys.argv)
+except ConfigError, inst:
+    print str(inst)
+    usage()
+    sys.exit(1)
+
+def collect(src):
+    seplen = len(os.path.sep)
+    candidates = []
+    for dirpath, dirnames, filenames in os.walk(src):
+        relpath = dirpath[len(src) + seplen:]
+        for filename in filenames:
+            if not (filename.endswith('.i') or filename.endswith('.d')):
+                continue
+            st = os.stat(os.path.join(dirpath, filename))
+            candidates.append((os.path.join(relpath, filename), st))
+
+    return candidates
+
+def prune(candidates, dst):
+    targets = []
+    for fn, st in candidates:
+        tgt = os.path.join(dst, fn)
+        try:
+            ts = os.stat(tgt)
+        except OSError:
+            # Destination doesn't have this file?
+            continue
+        if st.st_ino == ts.st_ino:
+            continue
+        if st.st_dev != ts.st_dev:
+            raise Exception('Source and destination are on different devices')
+        if st.st_size != ts.st_size:
+            continue
+        targets.append((fn, ts.st_size))
+
+    return targets
+
+def relink(src, dst, files):
+    CHUNKLEN = 65536
+    relinked = 0
+    savedbytes = 0
+
+    for f, sz in files:
+        source = os.path.join(src, f)
+        tgt = os.path.join(dst, f)
+        sfp = file(source)
+        dfp = file(tgt)
+        sin = sfp.read(CHUNKLEN)
+        while sin:
+            din = dfp.read(CHUNKLEN)
+            if sin != din:
+                break
+            sin = sfp.read(CHUNKLEN)
+        if sin:
+            continue
+        try:
+            os.rename(tgt, tgt + '.bak')
+            try:
+                os.link(source, tgt)
+            except OSError:
+                os.rename(tgt + '.bak', tgt)
+                raise
+            print 'Relinked %s' % f
+            relinked += 1
+            savedbytes += sz
+            os.remove(tgt + '.bak')
+        except OSError, inst:
+            print '%s: %s' % (tgt, str(inst))
+
+    print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
+
+src = os.path.join(cfg.src, '.hg')
+dst = os.path.join(cfg.dst, '.hg')
+candidates = collect(src)
+targets = prune(candidates, dst)
+relink(src, dst, targets)