# HG changeset patch # User Benoit Boissinot # Date 1368218832 -7200 # Node ID 94a22595f702c404f5de746d436668376cd09cb7 # Parent e9c5b1c246dc07096561f4dd0dcbfe3c17bb8f45 shrink: delete extension preventing further refactoring (BC) Reordering can be done by setting bundle.reorder to true and doing a clone --pull. diff -r e9c5b1c246dc -r 94a22595f702 contrib/shrink-revlog.py --- a/contrib/shrink-revlog.py Sun Feb 10 16:03:20 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,295 +0,0 @@ -"""reorder a revlog (the manifest by default) to save space - -Specifically, this topologically sorts the revisions in the revlog so that -revisions on the same branch are adjacent as much as possible. This is a -workaround for the fact that Mercurial computes deltas relative to the -previous revision rather than relative to a parent revision. - -This is *not* safe to run on a changelog. -""" - -# Originally written by Benoit Boissinot -# as a patch to rewrite-log. Cleaned up, refactored, documented, and -# renamed by Greg Ward . - -# XXX would be nice to have a way to verify the repository after shrinking, -# e.g. by comparing "before" and "after" states of random changesets -# (maybe: export before, shrink, export after, diff). - -import os, errno -from mercurial import revlog, transaction, node, util, scmutil -from mercurial import changegroup -from mercurial.i18n import _ - - -def postorder(start, edges): - result = [] - visit = list(start) - finished = set() - - while visit: - cur = visit[-1] - for p in edges[cur]: - # defend against node.nullrev because it's occasionally - # possible for a node to have parents (null, something) - # rather than (something, null) - if p not in finished and p != node.nullrev: - visit.append(p) - break - else: - result.append(cur) - finished.add(cur) - visit.pop() - - return result - -def toposort_reversepostorder(ui, rl): - # postorder of the reverse directed graph - - # map rev to list of parent revs (p2 first) - parents = {} - heads = set() - ui.status(_('reading revs\n')) - try: - for rev in rl: - ui.progress(_('reading'), rev, total=len(rl)) - (p1, p2) = rl.parentrevs(rev) - if p1 == p2 == node.nullrev: - parents[rev] = () # root node - elif p1 == p2 or p2 == node.nullrev: - parents[rev] = (p1,) # normal node - else: - parents[rev] = (p2, p1) # merge node - heads.add(rev) - for p in parents[rev]: - heads.discard(p) - finally: - ui.progress(_('reading'), None) - - heads = list(heads) - heads.sort(reverse=True) - - ui.status(_('sorting revs\n')) - return postorder(heads, parents) - -def toposort_postorderreverse(ui, rl): - # reverse-postorder of the reverse directed graph - - children = {} - roots = set() - ui.status(_('reading revs\n')) - try: - for rev in rl: - ui.progress(_('reading'), rev, total=len(rl)) - (p1, p2) = rl.parentrevs(rev) - if p1 == p2 == node.nullrev: - roots.add(rev) - children[rev] = [] - if p1 != node.nullrev: - children[p1].append(rev) - if p2 != node.nullrev: - children[p2].append(rev) - finally: - ui.progress(_('reading'), None) - - roots = list(roots) - roots.sort() - - ui.status(_('sorting revs\n')) - result = postorder(roots, children) - result.reverse() - return result - -def writerevs(ui, repo, r1, r2, order, tr): - - ui.status(_('writing revs\n')) - - - order = [r1.node(r) for r in order] - - # this is a bit ugly, but it works - count = [0] - def lookup(revl, x): - count[0] += 1 - ui.progress(_('writing'), count[0], total=len(order)) - return "%020d" % revl.linkrev(revl.rev(x)) - - unlookup = lambda x: int(x, 10) - - try: - bundler = changegroup.bundle10(repo) - bundler.start(lookup) - group = util.chunkbuffer(bundler.group(order, r1)) - group = changegroup.unbundle10(group, "UN") - r2.addgroup(group, unlookup, tr) - finally: - ui.progress(_('writing'), None) - -def report(ui, r1, r2): - def getsize(r): - s = 0 - for fn in (r.indexfile, r.datafile): - try: - s += os.stat(fn).st_size - except OSError, inst: - if inst.errno != errno.ENOENT: - raise - return s - - oldsize = float(getsize(r1)) - newsize = float(getsize(r2)) - - # argh: have to pass an int to %d, because a float >= 2^32 - # blows up under Python 2.5 or earlier - ui.write(_('old file size: %12d bytes (%6.1f MiB)\n') - % (int(oldsize), oldsize / 1024 / 1024)) - ui.write(_('new file size: %12d bytes (%6.1f MiB)\n') - % (int(newsize), newsize / 1024 / 1024)) - - shrink_percent = (oldsize - newsize) / oldsize * 100 - shrink_factor = oldsize / newsize - ui.write(_('shrinkage: %.1f%% (%.1fx)\n') - % (shrink_percent, shrink_factor)) - -def shrink(ui, repo, **opts): - """shrink a revlog by reordering revisions - - Rewrites all the entries in some revlog of the current repository - (by default, the manifest log) to save space. - - Different sort algorithms have different performance - characteristics. Use ``--sort`` to select a sort algorithm so you - can determine which works best for your data. - """ - - if not repo.local(): - raise util.Abort(_('not a local repository: %s') % repo.root) - - fn = opts.get('revlog') - if not fn: - indexfn = repo.sjoin('00manifest.i') - else: - if not fn.endswith('.i'): - raise util.Abort(_('--revlog option must specify the revlog index ' - 'file (*.i), not %s') % opts.get('revlog')) - - indexfn = os.path.realpath(fn) - store = repo.sjoin('') - if not indexfn.startswith(store): - raise util.Abort(_('--revlog option must specify a revlog in %s, ' - 'not %s') % (store, indexfn)) - - sortname = opts['sort'] - try: - toposort = globals()['toposort_' + sortname] - except KeyError: - raise util.Abort(_('no such toposort algorithm: %s') % sortname) - - if not os.path.exists(indexfn): - raise util.Abort(_('no such file: %s') % indexfn) - if '00changelog' in indexfn: - raise util.Abort(_('shrinking the changelog ' - 'will corrupt your repository')) - - ui.write(_('shrinking %s\n') % indexfn) - tmpindexfn = util.mktempcopy(indexfn, emptyok=True) - - r1 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), indexfn) - r2 = revlog.revlog(scmutil.opener(os.getcwd(), audit=False), tmpindexfn) - - datafn, tmpdatafn = r1.datafile, r2.datafile - - oldindexfn = indexfn + '.old' - olddatafn = datafn + '.old' - if os.path.exists(oldindexfn) or os.path.exists(olddatafn): - raise util.Abort(_('one or both of\n' - ' %s\n' - ' %s\n' - 'exists from a previous run; please clean up ' - 'before running again') % (oldindexfn, olddatafn)) - - # Don't use repo.transaction(), because then things get hairy with - # paths: some need to be relative to .hg, and some need to be - # absolute. Doing it this way keeps things simple: everything is an - # absolute path. - lock = repo.lock(wait=False) - tr = transaction.transaction(ui.warn, - open, - repo.sjoin('journal')) - - def ignoremissing(func): - def f(*args, **kw): - try: - return func(*args, **kw) - except OSError, inst: - if inst.errno != errno.ENOENT: - raise - return f - - try: - try: - order = toposort(ui, r1) - - suboptimal = 0 - for i in xrange(1, len(order)): - parents = [p for p in r1.parentrevs(order[i]) - if p != node.nullrev] - if parents and order[i - 1] not in parents: - suboptimal += 1 - ui.note(_('%d suboptimal nodes\n') % suboptimal) - - writerevs(ui, repo, r1, r2, order, tr) - report(ui, r1, r2) - tr.close() - except: # re-raises - # Abort transaction first, so we truncate the files before - # deleting them. - tr.abort() - for fn in (tmpindexfn, tmpdatafn): - ignoremissing(os.unlink)(fn) - raise - if not opts.get('dry_run'): - # racy, both files cannot be renamed atomically - # copy files - util.oslink(indexfn, oldindexfn) - ignoremissing(util.oslink)(datafn, olddatafn) - - # rename - util.rename(tmpindexfn, indexfn) - try: - os.chmod(tmpdatafn, os.stat(datafn).st_mode) - util.rename(tmpdatafn, datafn) - except OSError, inst: - if inst.errno != errno.ENOENT: - raise - ignoremissing(os.unlink)(datafn) - else: - for fn in (tmpindexfn, tmpdatafn): - ignoremissing(os.unlink)(fn) - finally: - lock.release() - - if not opts.get('dry_run'): - ui.write( - _('note: old revlog saved in:\n' - ' %s\n' - ' %s\n' - '(You can delete those files when you are satisfied that your\n' - 'repository is still sane. ' - 'Running \'hg verify\' is strongly recommended.)\n') - % (oldindexfn, olddatafn)) - -cmdtable = { - 'shrink': (shrink, - [('', 'revlog', '', - _('the revlog to shrink (.i)')), - ('n', 'dry-run', None, - _('do not shrink, simulate only')), - ('', 'sort', 'reversepostorder', - _('name of sort algorithm to use')), - ], - _('hg shrink [--revlog PATH]')) -} - -if __name__ == "__main__": - print "shrink-revlog.py is now an extension (see hg help extensions)" diff -r e9c5b1c246dc -r 94a22595f702 tests/test-contrib.t --- a/tests/test-contrib.t Sun Feb 10 16:03:20 2013 +0100 +++ b/tests/test-contrib.t Fri May 10 22:47:12 2013 +0200 @@ -103,34 +103,6 @@ no changes found [1] - -#if hardlink - -Test shrink-revlog: - $ cd repo-a - $ hg --config extensions.shrink="$CONTRIBDIR/shrink-revlog.py" shrink - shrinking $TESTTMP/repo-a/.hg/store/00manifest.i (glob) - reading revs - sorting revs - writing revs - old file size: 324 bytes ( 0.0 MiB) - new file size: 324 bytes ( 0.0 MiB) - shrinkage: 0.0% (1.0x) - note: old revlog saved in: - $TESTTMP/repo-a/.hg/store/00manifest.i.old (glob) - $TESTTMP/repo-a/.hg/store/00manifest.d.old (glob) - (You can delete those files when you are satisfied that your - repository is still sane. Running 'hg verify' is strongly recommended.) - $ hg verify - checking changesets - checking manifests - crosschecking files in changesets and manifests - checking files - 1 files, 3 changesets, 3 total revisions - $ cd .. - -#endif - Test simplemerge command: $ cp "$CONTRIBDIR/simplemerge" .