comparison contrib/shrink-revlog.py @ 10542:989b2a5eaaba

shrink: handle all combinations of inline/non-inline revlogs
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Wed, 24 Feb 2010 18:22:45 +0100
parents 3e7e789d9494
children 1ee14abe07b4 d3ebb1a0bc49
comparison
equal deleted inserted replaced
10532:e4a8ae4659d3 10542:989b2a5eaaba
17 17
18 # XXX would be nice to have a way to verify the repository after shrinking, 18 # XXX would be nice to have a way to verify the repository after shrinking,
19 # e.g. by comparing "before" and "after" states of random changesets 19 # e.g. by comparing "before" and "after" states of random changesets
20 # (maybe: export before, shrink, export after, diff). 20 # (maybe: export before, shrink, export after, diff).
21 21
22 import os, tempfile 22 import os, tempfile, errno
23 from mercurial import revlog, transaction, node, util 23 from mercurial import revlog, transaction, node, util
24 from mercurial import changegroup 24 from mercurial import changegroup
25 from mercurial.i18n import _ 25 from mercurial.i18n import _
26 26
27 def toposort(ui, rl): 27 def toposort(ui, rl):
89 chunkiter = changegroup.chunkiter(group) 89 chunkiter = changegroup.chunkiter(group)
90 r2.addgroup(chunkiter, unlookup, tr) 90 r2.addgroup(chunkiter, unlookup, tr)
91 finally: 91 finally:
92 ui.progress(_('writing'), None, len(order)) 92 ui.progress(_('writing'), None, len(order))
93 93
94 def report(ui, olddatafn, newdatafn): 94 def report(ui, r1, r2):
95 oldsize = float(os.stat(olddatafn).st_size) 95 def getsize(r):
96 newsize = float(os.stat(newdatafn).st_size) 96 s = 0
97 for fn in (r.indexfile, r.datafile):
98 try:
99 s += os.stat(fn).st_size
100 except OSError, inst:
101 if inst.errno != errno.ENOENT:
102 raise
103 return s
104
105 oldsize = float(getsize(r1))
106 newsize = float(getsize(r2))
97 107
98 # argh: have to pass an int to %d, because a float >= 2^32 108 # argh: have to pass an int to %d, because a float >= 2^32
99 # blows up under Python 2.5 or earlier 109 # blows up under Python 2.5 or earlier
100 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n') 110 ui.write(_('old file size: %12d bytes (%6.1f MiB)\n')
101 % (int(oldsize), oldsize / 1024 / 1024)) 111 % (int(oldsize), oldsize / 1024 / 1024))
127 store = repo.sjoin('') 137 store = repo.sjoin('')
128 if not indexfn.startswith(store): 138 if not indexfn.startswith(store):
129 raise util.Abort(_('--revlog option must specify a revlog in %s, ' 139 raise util.Abort(_('--revlog option must specify a revlog in %s, '
130 'not %s') % (store, indexfn)) 140 'not %s') % (store, indexfn))
131 141
132 datafn = indexfn[:-2] + '.d'
133 if not os.path.exists(indexfn): 142 if not os.path.exists(indexfn):
134 raise util.Abort(_('no such file: %s') % indexfn) 143 raise util.Abort(_('no such file: %s') % indexfn)
135 if '00changelog' in indexfn: 144 if '00changelog' in indexfn:
136 raise util.Abort(_('shrinking the changelog ' 145 raise util.Abort(_('shrinking the changelog '
137 'will corrupt your repository')) 146 'will corrupt your repository'))
138 if not os.path.exists(datafn): 147
139 # This is just a lazy shortcut because I can't be bothered to 148 ui.write(_('shrinking %s\n') % indexfn)
140 # handle all the special cases that entail from no .d file. 149 prefix = os.path.basename(indexfn)[:-1]
141 raise util.Abort(_('%s does not exist: revlog not big enough ' 150 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
142 'to be worth shrinking') % datafn) 151 prefix=prefix,
152 suffix='.i')
153 os.close(tmpfd)
154
155 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
156 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
157
158 datafn, tmpdatafn = r1.datafile, r2.datafile
143 159
144 oldindexfn = indexfn + '.old' 160 oldindexfn = indexfn + '.old'
145 olddatafn = datafn + '.old' 161 olddatafn = datafn + '.old'
146 if os.path.exists(oldindexfn) or os.path.exists(olddatafn): 162 if os.path.exists(oldindexfn) or os.path.exists(olddatafn):
147 raise util.Abort(_('one or both of\n' 163 raise util.Abort(_('one or both of\n'
148 ' %s\n' 164 ' %s\n'
149 ' %s\n' 165 ' %s\n'
150 'exists from a previous run; please clean up ' 166 'exists from a previous run; please clean up '
151 'before running again') % (oldindexfn, olddatafn)) 167 'before running again') % (oldindexfn, olddatafn))
152 168
153 ui.write(_('shrinking %s\n') % indexfn)
154 prefix = os.path.basename(indexfn)[:-1]
155 (tmpfd, tmpindexfn) = tempfile.mkstemp(dir=os.path.dirname(indexfn),
156 prefix=prefix,
157 suffix='.i')
158 tmpdatafn = tmpindexfn[:-2] + '.d'
159 os.close(tmpfd)
160
161 r1 = revlog.revlog(util.opener(os.getcwd(), audit=False), indexfn)
162 r2 = revlog.revlog(util.opener(os.getcwd(), audit=False), tmpindexfn)
163
164 # Don't use repo.transaction(), because then things get hairy with 169 # Don't use repo.transaction(), because then things get hairy with
165 # paths: some need to be relative to .hg, and some need to be 170 # paths: some need to be relative to .hg, and some need to be
166 # absolute. Doing it this way keeps things simple: everything is an 171 # absolute. Doing it this way keeps things simple: everything is an
167 # absolute path. 172 # absolute path.
168 lock = repo.lock(wait=False) 173 lock = repo.lock(wait=False)
169 tr = transaction.transaction(ui.warn, 174 tr = transaction.transaction(ui.warn,
170 open, 175 open,
171 repo.sjoin('journal')) 176 repo.sjoin('journal'))
172 177
178 def ignoremissing(func):
179 def f(*args, **kw):
180 try:
181 return func(*args, **kw)
182 except OSError, inst:
183 if inst.errno != errno.ENOENT:
184 raise
185 return f
186
173 try: 187 try:
174 try: 188 try:
175 order = toposort(ui, r1) 189 order = toposort(ui, r1)
176 writerevs(ui, r1, r2, order, tr) 190 writerevs(ui, r1, r2, order, tr)
177 report(ui, datafn, tmpdatafn) 191 report(ui, r1, r2)
178 tr.close() 192 tr.close()
179 except: 193 except:
180 # Abort transaction first, so we truncate the files before 194 # Abort transaction first, so we truncate the files before
181 # deleting them. 195 # deleting them.
182 tr.abort() 196 tr.abort()
183 if os.path.exists(tmpindexfn): 197 for fn in (tmpindexfn, tmpdatafn):
184 os.unlink(tmpindexfn) 198 ignoremissing(os.unlink)(fn)
185 if os.path.exists(tmpdatafn):
186 os.unlink(tmpdatafn)
187 raise 199 raise
188 if not opts.get('dry_run'): 200 if not opts.get('dry_run'):
189 # Racy since both files cannot be renamed atomically 201 # racy, both files cannot be renamed atomically
202 # copy files
190 util.os_link(indexfn, oldindexfn) 203 util.os_link(indexfn, oldindexfn)
191 util.os_link(datafn, olddatafn) 204 ignoremissing(util.os_link)(datafn, olddatafn)
205 # rename
192 util.rename(tmpindexfn, indexfn) 206 util.rename(tmpindexfn, indexfn)
193 util.rename(tmpdatafn, datafn) 207 try:
208 util.rename(tmpdatafn, datafn)
209 except OSError, inst:
210 if inst.errno != errno.ENOENT:
211 raise
212 ignoremissing(os.unlink)(datafn)
194 else: 213 else:
195 os.unlink(tmpindexfn) 214 for fn in (tmpindexfn, tmpdatafn):
196 os.unlink(tmpdatafn) 215 ignoremissing(os.unlink)(fn)
197 finally: 216 finally:
198 lock.release() 217 lock.release()
199 218
200 if not opts.get('dry_run'): 219 if not opts.get('dry_run'):
201 ui.write(_('note: old revlog saved in:\n' 220 ui.write(_('note: old revlog saved in:\n'