convert: ignore hg source errors with hg.ignoreerrors (issue 1357)
authorPatrick Mezard <pmezard@gmail.com>
Tue, 21 Oct 2008 21:24:47 +0200
changeset 7231 8e7130a10f3b
parent 7230 261a9f47b44b
child 7232 c2ac09f81ec9
convert: ignore hg source errors with hg.ignoreerrors (issue 1357) This flag makes missing revlog errors to be ignored which allow broken repositories to be fixed by converting them from and to Mercurial.
hgext/convert/__init__.py
hgext/convert/hg.py
tests/test-convert-hg-source
tests/test-convert-hg-source.out
tests/test-convert.out
--- a/hgext/convert/__init__.py	Thu Oct 23 14:56:16 2008 -0500
+++ b/hgext/convert/__init__.py	Tue Oct 21 21:24:47 2008 +0200
@@ -85,6 +85,10 @@
     Mercurial Source
     -----------------
 
+    --config convert.hg.ignoreerrors=False (boolean) 
+        ignore integrity errors when reading. Use it to fix Mercurial
+        repositories with missing revlogs, by converting from and to
+        Mercurial.
     --config convert.hg.saverev=True          (boolean)
         allow target to preserve source revision ID
     --config convert.hg.startrev=0            (hg revision identifier)
--- a/hgext/convert/hg.py	Thu Oct 23 14:56:16 2008 -0500
+++ b/hgext/convert/hg.py	Tue Oct 21 21:24:47 2008 +0200
@@ -192,6 +192,8 @@
 class mercurial_source(converter_source):
     def __init__(self, ui, path, rev=None):
         converter_source.__init__(self, ui, path, rev)
+        self.ignoreerrors = ui.configbool('convert', 'hg.ignoreerrors', False)
+        self.ignored = {}
         self.saverev = ui.configbool('convert', 'hg.saverev', True)
         try:
             self.repo = hg.repository(self.ui, path)
@@ -253,23 +255,35 @@
         parents = self.parents(ctx)
         if not parents:
             files = util.sort(ctx.manifest().keys())
-            return [(f, rev) for f in files], {}
+            return [(f, rev) for f in files if f not in self.ignored], {}
         if self._changescache and self._changescache[0] == rev:
             m, a, r = self._changescache[1]
         else:
             m, a, r = self.repo.status(parents[0], ctx.node())[:3]
-        changes = [(name, rev) for name in m + a + r]
-        return util.sort(changes), self.getcopies(ctx, m + a)
+        # getcopies() detects missing revlogs early, run it before
+        # filtering the changes.
+        copies = self.getcopies(ctx, m + a)
+        changes = [(name, rev) for name in m + a + r 
+                   if name not in self.ignored]
+        return util.sort(changes), copies
 
     def getcopies(self, ctx, files):
         copies = {}
         for name in files:
+            if name in self.ignored:
+                continue
             try:
-                copynode = ctx.filectx(name).renamed()[0]
-                if self.keep(copynode):
-                    copies[name] = copynode
+                copysource, copynode = ctx.filectx(name).renamed()
+                if copysource in self.ignored or not self.keep(copynode):
+                    continue
+                copies[name] = copysource
             except TypeError:
                 pass
+            except revlog.LookupError, e:
+                if not self.ignoreerrors:
+                    raise
+                self.ignored[name] = 1
+                self.ui.warn(_('ignoring: %s\n') % e)
         return copies
 
     def getcommit(self, rev):
@@ -297,6 +311,7 @@
         else:
             i = i or 0
             changes = self.repo.status(parents[i], ctx.node())[:3]
+        changes = [[f for f in l if f not in self.ignored] for l in changes]
 
         if i == 0:
             self._changescache = (rev, changes)
--- a/tests/test-convert-hg-source	Thu Oct 23 14:56:16 2008 -0500
+++ b/tests/test-convert-hg-source	Tue Oct 21 21:24:47 2008 +0200
@@ -36,5 +36,34 @@
 hg convert --datesort orig new 2>&1 | grep -v 'subversion python bindings could not be loaded'
 cd new
 hg out ../orig
+cd ..
+
+echo % init broken repository
+hg init broken
+cd broken
+echo a >> a
+echo b >> b
+hg ci -qAm init
+echo a >> a
+echo b >> b
+hg copy b c
+hg ci -qAm changeall
+hg up -qC 0
+echo bc >> b
+hg ci -m changebagain
+HGMERGE=internal:local hg -q merge
+hg ci -m merge
+hg mv b d
+hg ci -m moveb
+echo % break it
+rm .hg/store/data/b.*
+cd ..
+
+hg --config convert.hg.ignoreerrors=True convert broken fixed
+hg -R fixed verify
+echo '% manifest -r 0'
+hg -R fixed manifest -r 0
+echo '% manifest -r tip'
+hg -R fixed manifest -r tip
 
 true
--- a/tests/test-convert-hg-source.out	Thu Oct 23 14:56:16 2008 -0500
+++ b/tests/test-convert-hg-source.out	Tue Oct 21 21:24:47 2008 +0200
@@ -20,3 +20,27 @@
 comparing with ../orig
 searching for changes
 no changes found
+% init broken repository
+created new head
+% break it
+initializing destination fixed repository
+scanning source...
+sorting...
+converting...
+4 init
+3 changebagain
+ignoring: data/b.i@4b3c32ced4f8: no match found
+2 changeall
+1 merge
+0 moveb
+checking changesets
+checking manifests
+crosschecking files in changesets and manifests
+checking files
+3 files, 5 changesets, 5 total revisions
+% manifest -r 0
+a
+% manifest -r tip
+a
+c
+d
--- a/tests/test-convert.out	Thu Oct 23 14:56:16 2008 -0500
+++ b/tests/test-convert.out	Tue Oct 21 21:24:47 2008 +0200
@@ -72,6 +72,10 @@
     Mercurial Source
     -----------------
 
+    --config convert.hg.ignoreerrors=False (boolean) 
+        ignore integrity errors when reading. Use it to fix Mercurial
+        repositories with missing revlogs, by converting from and to
+        Mercurial.
     --config convert.hg.saverev=True          (boolean)
         allow target to preserve source revision ID
     --config convert.hg.startrev=0            (hg revision identifier)