Mercurial > hg
comparison hgext/relink.py @ 9729:aa9ccab5af37
Issue919: add a standard extension to recreate hardlinks between repositories.
Having to run a standalone Python script from the contrib dir is a nuisance.
Also makes various improvements to locking, file discovery, etc.
Should also update: http://www.selenic.com/mercurial/wiki/index.cgi/RecreateHardlinksBetweenRepositories
author | Jesse Glick <jesse.glick@sun.com> |
---|---|
date | Thu, 05 Nov 2009 17:38:03 -0500 |
parents | contrib/hg-relink@46293a0c7e9f |
children | 819e6c7085fc |
comparison
equal
deleted
inserted
replaced
9728:acb1c59b4514 | 9729:aa9ccab5af37 |
---|---|
1 # Mercurial extension to provide 'hg relink' command | |
2 # | |
3 # Copyright (C) 2007 Brendan Cully <brendan@kublai.com> | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2, incorporated herein by reference. | |
7 | |
8 """recreates hardlinks between repository clones""" | |
9 | |
10 from mercurial import cmdutil, hg, util | |
11 from mercurial.i18n import _ | |
12 import os, stat | |
13 | |
14 def relink(ui, repo, origin=None, **opts): | |
15 """recreate hardlinks between two repositories | |
16 | |
17 When repositories are cloned locally, their data files will be hardlinked | |
18 so that they only use the space of a single repository. | |
19 | |
20 Unfortunately, subsequent pulls into either repository will break hardlinks | |
21 for any files touched by the new changesets, even if both repositories end | |
22 up pulling the same changes. | |
23 | |
24 Similarly, passing --rev to "hg clone" will fail to use | |
25 any hardlinks, falling back to a complete copy of the source repository. | |
26 | |
27 This command lets you recreate those hardlinks and reclaim that wasted | |
28 space. | |
29 | |
30 This repository will be relinked to share space with ORIGIN, which must be | |
31 on the same local disk. If ORIGIN is omitted, looks for "default-relink", | |
32 then "default", in [paths]. | |
33 | |
34 Do not attempt any read operations on this repository while the command is | |
35 running. (Both repositories will be locked against writes.) | |
36 """ | |
37 src = hg.repository( | |
38 cmdutil.remoteui(repo, opts), | |
39 ui.expandpath(origin or 'default-relink', origin or 'default')) | |
40 if not src.local(): | |
41 raise util.Abort('must specify local origin repository') | |
42 ui.status(_('relinking %s to %s\n') % (src.store.path, repo.store.path)) | |
43 locallock = repo.lock() | |
44 try: | |
45 remotelock = src.lock() | |
46 try: | |
47 candidates = collect(src.store.path, ui) | |
48 targets = prune(candidates, repo.store.path, ui) | |
49 do_relink(src.store.path, repo.store.path, targets, ui) | |
50 finally: | |
51 remotelock.release() | |
52 finally: | |
53 locallock.release() | |
54 | |
55 def collect(src, ui): | |
56 seplen = len(os.path.sep) | |
57 candidates = [] | |
58 for dirpath, dirnames, filenames in os.walk(src): | |
59 relpath = dirpath[len(src) + seplen:] | |
60 for filename in filenames: | |
61 if not filename[-2:] in ('.d', '.i'): | |
62 continue | |
63 st = os.stat(os.path.join(dirpath, filename)) | |
64 if not stat.S_ISREG(st.st_mode): | |
65 continue | |
66 candidates.append((os.path.join(relpath, filename), st)) | |
67 | |
68 ui.status(_('collected %d candidate storage files\n') % len(candidates)) | |
69 return candidates | |
70 | |
71 def prune(candidates, dst, ui): | |
72 def linkfilter(dst, st): | |
73 try: | |
74 ts = os.stat(dst) | |
75 except OSError: | |
76 # Destination doesn't have this file? | |
77 return False | |
78 if st.st_ino == ts.st_ino: | |
79 return False | |
80 if st.st_dev != ts.st_dev: | |
81 # No point in continuing | |
82 raise util.Abort( | |
83 _('source and destination are on different devices')) | |
84 if st.st_size != ts.st_size: | |
85 return False | |
86 return st | |
87 | |
88 targets = [] | |
89 for fn, st in candidates: | |
90 tgt = os.path.join(dst, fn) | |
91 ts = linkfilter(tgt, st) | |
92 if not ts: | |
93 ui.debug(_('not linkable: %s\n') % fn) | |
94 continue | |
95 targets.append((fn, ts.st_size)) | |
96 | |
97 ui.status(_('pruned down to %d probably relinkable files\n') % len(targets)) | |
98 return targets | |
99 | |
100 def do_relink(src, dst, files, ui): | |
101 def relinkfile(src, dst): | |
102 bak = dst + '.bak' | |
103 os.rename(dst, bak) | |
104 try: | |
105 os.link(src, dst) | |
106 except OSError: | |
107 os.rename(bak, dst) | |
108 raise | |
109 os.remove(bak) | |
110 | |
111 CHUNKLEN = 65536 | |
112 relinked = 0 | |
113 savedbytes = 0 | |
114 | |
115 pos = 0 | |
116 total = len(files) | |
117 for f, sz in files: | |
118 pos += 1 | |
119 source = os.path.join(src, f) | |
120 tgt = os.path.join(dst, f) | |
121 sfp = file(source) | |
122 dfp = file(tgt) | |
123 sin = sfp.read(CHUNKLEN) | |
124 while sin: | |
125 din = dfp.read(CHUNKLEN) | |
126 if sin != din: | |
127 break | |
128 sin = sfp.read(CHUNKLEN) | |
129 if sin: | |
130 ui.debug(_('not linkable: %s\n') % f) | |
131 continue | |
132 try: | |
133 relinkfile(source, tgt) | |
134 ui.progress(_('relink'), pos, f, _(' files'), total) | |
135 relinked += 1 | |
136 savedbytes += sz | |
137 except OSError, inst: | |
138 ui.warn(_('%s: %s\n') % (tgt, str(inst))) | |
139 | |
140 ui.status(_('relinked %d files (%d bytes reclaimed)\n') % | |
141 (relinked, savedbytes)) | |
142 | |
143 cmdtable = { | |
144 'relink': ( | |
145 relink, | |
146 [], | |
147 _('[ORIGIN]') | |
148 ) | |
149 } |