Issue919: add a standard extension to recreate hardlinks between repositories.
Having to run a standalone Python script from the contrib dir is a nuisance.
Also makes various improvements to locking, file discovery, etc.
Should also update: http://www.selenic.com/mercurial/wiki/index.cgi/RecreateHardlinksBetweenRepositories
--- a/contrib/hg-relink Fri Nov 06 08:28:45 2009 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
-#
-# This software may be used and distributed according to the terms of the
-# GNU General Public License version 2, incorporated herein by reference.
-
-import os, sys
-
-class ConfigError(Exception): pass
-
-def usage():
- print """relink <source> <destination>
- Recreate hard links between source and destination repositories"""
-
-class Config:
- def __init__(self, args):
- if len(args) != 3:
- raise ConfigError("wrong number of arguments")
- self.src = os.path.abspath(args[1])
- self.dst = os.path.abspath(args[2])
- for d in (self.src, self.dst):
- if not os.path.exists(os.path.join(d, '.hg')):
- raise ConfigError("%s: not a mercurial repository" % d)
-
-def collect(src):
- seplen = len(os.path.sep)
- candidates = []
- for dirpath, dirnames, filenames in os.walk(src):
- relpath = dirpath[len(src) + seplen:]
- for filename in filenames:
- if not filename.endswith('.i'):
- continue
- st = os.stat(os.path.join(dirpath, filename))
- candidates.append((os.path.join(relpath, filename), st))
-
- return candidates
-
-def prune(candidates, dst):
- def getdatafile(path):
- if not path.endswith('.i'):
- return None, None
- df = path[:-1] + 'd'
- try:
- st = os.stat(df)
- except OSError:
- return None, None
- return df, st
-
- def linkfilter(dst, st):
- try:
- ts = os.stat(dst)
- except OSError:
- # Destination doesn't have this file?
- return False
- if st.st_ino == ts.st_ino:
- return False
- if st.st_dev != ts.st_dev:
- # No point in continuing
- raise Exception('Source and destination are on different devices')
- if st.st_size != ts.st_size:
- # TODO: compare revlog heads
- return False
- return st
-
- targets = []
- for fn, st in candidates:
- tgt = os.path.join(dst, fn)
- ts = linkfilter(tgt, st)
- if not ts:
- continue
- targets.append((fn, ts.st_size))
- df, ts = getdatafile(tgt)
- if df:
- targets.append((fn[:-1] + 'd', ts.st_size))
-
- return targets
-
-def relink(src, dst, files):
- def relinkfile(src, dst):
- bak = dst + '.bak'
- os.rename(dst, bak)
- try:
- os.link(src, dst)
- except OSError:
- os.rename(bak, dst)
- raise
- os.remove(bak)
-
- CHUNKLEN = 65536
- relinked = 0
- savedbytes = 0
-
- for f, sz in files:
- source = os.path.join(src, f)
- tgt = os.path.join(dst, f)
- sfp = file(source)
- dfp = file(tgt)
- sin = sfp.read(CHUNKLEN)
- while sin:
- din = dfp.read(CHUNKLEN)
- if sin != din:
- break
- sin = sfp.read(CHUNKLEN)
- if sin:
- continue
- try:
- relinkfile(source, tgt)
- print 'Relinked %s' % f
- relinked += 1
- savedbytes += sz
- except OSError, inst:
- print '%s: %s' % (tgt, str(inst))
-
- print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
-
-try:
- cfg = Config(sys.argv)
-except ConfigError, inst:
- print str(inst)
- usage()
- sys.exit(1)
-
-src = os.path.join(cfg.src, '.hg')
-dst = os.path.join(cfg.dst, '.hg')
-candidates = collect(src)
-targets = prune(candidates, dst)
-relink(src, dst, targets)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/relink.py Thu Nov 05 17:38:03 2009 -0500
@@ -0,0 +1,149 @@
+# Mercurial extension to provide 'hg relink' command
+#
+# Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+"""recreates hardlinks between repository clones"""
+
+from mercurial import cmdutil, hg, util
+from mercurial.i18n import _
+import os, stat
+
+def relink(ui, repo, origin=None, **opts):
+ """recreate hardlinks between two repositories
+
+ When repositories are cloned locally, their data files will be hardlinked
+ so that they only use the space of a single repository.
+
+ Unfortunately, subsequent pulls into either repository will break hardlinks
+ for any files touched by the new changesets, even if both repositories end
+ up pulling the same changes.
+
+ Similarly, passing --rev to "hg clone" will fail to use
+ any hardlinks, falling back to a complete copy of the source repository.
+
+ This command lets you recreate those hardlinks and reclaim that wasted
+ space.
+
+ This repository will be relinked to share space with ORIGIN, which must be
+ on the same local disk. If ORIGIN is omitted, looks for "default-relink",
+ then "default", in [paths].
+
+ Do not attempt any read operations on this repository while the command is
+ running. (Both repositories will be locked against writes.)
+ """
+ src = hg.repository(
+ cmdutil.remoteui(repo, opts),
+ ui.expandpath(origin or 'default-relink', origin or 'default'))
+ if not src.local():
+ raise util.Abort('must specify local origin repository')
+ ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path))
+ locallock = repo.lock()
+ try:
+ remotelock = src.lock()
+ try:
+ candidates = collect(src.store.path, ui)
+ targets = prune(candidates, repo.store.path, ui)
+ do_relink(src.store.path, repo.store.path, targets, ui)
+ finally:
+ remotelock.release()
+ finally:
+ locallock.release()
+
+def collect(src, ui):
+ seplen = len(os.path.sep)
+ candidates = []
+ for dirpath, dirnames, filenames in os.walk(src):
+ relpath = dirpath[len(src) + seplen:]
+ for filename in filenames:
+ if not filename[-2:] in ('.d', '.i'):
+ continue
+ st = os.stat(os.path.join(dirpath, filename))
+ if not stat.S_ISREG(st.st_mode):
+ continue
+ candidates.append((os.path.join(relpath, filename), st))
+
+ ui.status(_('collected %d candidate storage files\n') % len(candidates))
+ return candidates
+
+def prune(candidates, dst, ui):
+ def linkfilter(dst, st):
+ try:
+ ts = os.stat(dst)
+ except OSError:
+ # Destination doesn't have this file?
+ return False
+ if st.st_ino == ts.st_ino:
+ return False
+ if st.st_dev != ts.st_dev:
+ # No point in continuing
+ raise util.Abort(
+ _('source and destination are on different devices'))
+ if st.st_size != ts.st_size:
+ return False
+ return st
+
+ targets = []
+ for fn, st in candidates:
+ tgt = os.path.join(dst, fn)
+ ts = linkfilter(tgt, st)
+ if not ts:
+ ui.debug(_('not linkable: %s\n') % fn)
+ continue
+ targets.append((fn, ts.st_size))
+
+ ui.status(_('pruned down to %d probably relinkable files\n') % len(targets))
+ return targets
+
+def do_relink(src, dst, files, ui):
+ def relinkfile(src, dst):
+ bak = dst + '.bak'
+ os.rename(dst, bak)
+ try:
+ os.link(src, dst)
+ except OSError:
+ os.rename(bak, dst)
+ raise
+ os.remove(bak)
+
+ CHUNKLEN = 65536
+ relinked = 0
+ savedbytes = 0
+
+ pos = 0
+ total = len(files)
+ for f, sz in files:
+ pos += 1
+ source = os.path.join(src, f)
+ tgt = os.path.join(dst, f)
+ sfp = file(source)
+ dfp = file(tgt)
+ sin = sfp.read(CHUNKLEN)
+ while sin:
+ din = dfp.read(CHUNKLEN)
+ if sin != din:
+ break
+ sin = sfp.read(CHUNKLEN)
+ if sin:
+ ui.debug(_('not linkable: %s\n') % f)
+ continue
+ try:
+ relinkfile(source, tgt)
+ ui.progress(_('relink'), pos, f, _(' files'), total)
+ relinked += 1
+ savedbytes += sz
+ except OSError, inst:
+ ui.warn(_('%s: %s\n') % (tgt, str(inst)))
+
+ ui.status(_('relinked %d files (%d bytes reclaimed)\n') %
+ (relinked, savedbytes))
+
+cmdtable = {
+ 'relink': (
+ relink,
+ [],
+ _('[ORIGIN]')
+ )
+}