comparison hgext/relink.py @ 9729:aa9ccab5af37

Issue919: add a standard extension to recreate hardlinks between repositories. Having to run a standalone Python script from the contrib dir is a nuisance. Also makes various improvements to locking, file discovery, etc. Should also update: http://www.selenic.com/mercurial/wiki/index.cgi/RecreateHardlinksBetweenRepositories
author Jesse Glick <jesse.glick@sun.com>
date Thu, 05 Nov 2009 17:38:03 -0500
parents contrib/hg-relink@46293a0c7e9f
children 819e6c7085fc
comparison
equal deleted inserted replaced
9728:acb1c59b4514 9729:aa9ccab5af37
1 # Mercurial extension to provide 'hg relink' command
2 #
3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
7
8 """recreates hardlinks between repository clones"""
9
10 from mercurial import cmdutil, hg, util
11 from mercurial.i18n import _
12 import os, stat
13
14 def relink(ui, repo, origin=None, **opts):
15 """recreate hardlinks between two repositories
16
17 When repositories are cloned locally, their data files will be hardlinked
18 so that they only use the space of a single repository.
19
20 Unfortunately, subsequent pulls into either repository will break hardlinks
21 for any files touched by the new changesets, even if both repositories end
22 up pulling the same changes.
23
24 Similarly, passing --rev to "hg clone" will fail to use
25 any hardlinks, falling back to a complete copy of the source repository.
26
27 This command lets you recreate those hardlinks and reclaim that wasted
28 space.
29
30 This repository will be relinked to share space with ORIGIN, which must be
31 on the same local disk. If ORIGIN is omitted, looks for "default-relink",
32 then "default", in [paths].
33
34 Do not attempt any read operations on this repository while the command is
35 running. (Both repositories will be locked against writes.)
36 """
37 src = hg.repository(
38 cmdutil.remoteui(repo, opts),
39 ui.expandpath(origin or 'default-relink', origin or 'default'))
40 if not src.local():
41 raise util.Abort('must specify local origin repository')
42 ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path))
43 locallock = repo.lock()
44 try:
45 remotelock = src.lock()
46 try:
47 candidates = collect(src.store.path, ui)
48 targets = prune(candidates, repo.store.path, ui)
49 do_relink(src.store.path, repo.store.path, targets, ui)
50 finally:
51 remotelock.release()
52 finally:
53 locallock.release()
54
55 def collect(src, ui):
56 seplen = len(os.path.sep)
57 candidates = []
58 for dirpath, dirnames, filenames in os.walk(src):
59 relpath = dirpath[len(src) + seplen:]
60 for filename in filenames:
61 if not filename[-2:] in ('.d', '.i'):
62 continue
63 st = os.stat(os.path.join(dirpath, filename))
64 if not stat.S_ISREG(st.st_mode):
65 continue
66 candidates.append((os.path.join(relpath, filename), st))
67
68 ui.status(_('collected %d candidate storage files\n') % len(candidates))
69 return candidates
70
71 def prune(candidates, dst, ui):
72 def linkfilter(dst, st):
73 try:
74 ts = os.stat(dst)
75 except OSError:
76 # Destination doesn't have this file?
77 return False
78 if st.st_ino == ts.st_ino:
79 return False
80 if st.st_dev != ts.st_dev:
81 # No point in continuing
82 raise util.Abort(
83 _('source and destination are on different devices'))
84 if st.st_size != ts.st_size:
85 return False
86 return st
87
88 targets = []
89 for fn, st in candidates:
90 tgt = os.path.join(dst, fn)
91 ts = linkfilter(tgt, st)
92 if not ts:
93 ui.debug(_('not linkable: %s\n') % fn)
94 continue
95 targets.append((fn, ts.st_size))
96
97 ui.status(_('pruned down to %d probably relinkable files\n') % len(targets))
98 return targets
99
100 def do_relink(src, dst, files, ui):
101 def relinkfile(src, dst):
102 bak = dst + '.bak'
103 os.rename(dst, bak)
104 try:
105 os.link(src, dst)
106 except OSError:
107 os.rename(bak, dst)
108 raise
109 os.remove(bak)
110
111 CHUNKLEN = 65536
112 relinked = 0
113 savedbytes = 0
114
115 pos = 0
116 total = len(files)
117 for f, sz in files:
118 pos += 1
119 source = os.path.join(src, f)
120 tgt = os.path.join(dst, f)
121 sfp = file(source)
122 dfp = file(tgt)
123 sin = sfp.read(CHUNKLEN)
124 while sin:
125 din = dfp.read(CHUNKLEN)
126 if sin != din:
127 break
128 sin = sfp.read(CHUNKLEN)
129 if sin:
130 ui.debug(_('not linkable: %s\n') % f)
131 continue
132 try:
133 relinkfile(source, tgt)
134 ui.progress(_('relink'), pos, f, _(' files'), total)
135 relinked += 1
136 savedbytes += sz
137 except OSError, inst:
138 ui.warn(_('%s: %s\n') % (tgt, str(inst)))
139
140 ui.status(_('relinked %d files (%d bytes reclaimed)\n') %
141 (relinked, savedbytes))
142
143 cmdtable = {
144 'relink': (
145 relink,
146 [],
147 _('[ORIGIN]')
148 )
149 }