changeset 19120:58e782f076e7

splicemap: improve error handling when source is hg (issue2084) 1. Introduced 2 levels of error handling for splicemap files a. Check the splicemap file for rules which are same across different types of source repos. This is done through enhancing parsesplicemap function b. Check revision string formats. Each repo may have their own format. This is done usign checkrevformat function c. Implemented the above two for hg
author Ben Goswami <bengoswami@fb.com>
date Thu, 25 Apr 2013 11:50:26 -0700
parents 61f1223ab358
children 478a04605ce1
files hgext/convert/common.py hgext/convert/convcmd.py hgext/convert/hg.py tests/test-convert-splicemap.t
diffstat 4 files changed, 67 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/common.py	Wed Apr 24 18:26:37 2013 -0700
+++ b/hgext/convert/common.py	Thu Apr 25 11:50:26 2013 -0700
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import base64, errno, subprocess, os, datetime
+import base64, errno, subprocess, os, datetime, re
 import cPickle as pickle
 from mercurial import util
 from mercurial.i18n import _
@@ -63,6 +63,15 @@
 
         self.encoding = 'utf-8'
 
+    def checkhexformat(self, revstr):
+        """ fails if revstr is not a 40 byte hex. mercurial and git both uses
+            such format for their revision numbering
+        """
+        matchobj = re.match(r'[0-9a-fA-F]{40,40}$', revstr)
+        if matchobj is None:
+            raise util.Abort(_('splicemap entry %s is not a valid revision'
+                               ' identifier') % revstr)
+
     def before(self):
         pass
 
@@ -164,6 +173,13 @@
         """
         return {}
 
+    def checkrevformat(self, revstr):
+        """revstr is a string that describes a revision in the given
+           source control system.  Return true if revstr has correct
+           format.
+        """
+        return True
+
 class converter_sink(object):
     """Conversion sink (target) interface"""
 
--- a/hgext/convert/convcmd.py	Wed Apr 24 18:26:37 2013 -0700
+++ b/hgext/convert/convcmd.py	Thu Apr 25 11:50:26 2013 -0700
@@ -121,9 +121,17 @@
         self.splicemap = self.parsesplicemap(opts.get('splicemap'))
         self.branchmap = mapfile(ui, opts.get('branchmap'))
 
+    def parsesplicemap(self, path):
+        """ check and validate the splicemap format and
+            return a child/parents dictionary.
+            Format checking has two parts.
+            1. generic format which is same across all source types
+            2. specific format checking which may be different for
+               different source type.  This logic is implemented in
+               checkrevformat function in source files like
+               hg.py, subversion.py etc.
+        """
 
-    def parsesplicemap(self, path):
-        """Parse a splicemap, return a child/parents dictionary."""
         if not path:
             return {}
         m = {}
@@ -136,18 +144,28 @@
                     continue
                 try:
                     child, parents = line.split(' ', 1)
+                    self.source.checkrevformat(child)
                     parents = parents.replace(',', ' ').split()
+                    # check if number of parents are upto 2 max
+                    if (len(parents) > 2):
+                        raise util.Abort(_('syntax error in %s(%d): child '\
+                                            'parent1[,parent2] expected') \
+                                            % (path, i + 1))
+                    for parent in parents:
+                        self.source.checkrevformat(parent)
                 except ValueError:
-                    raise util.Abort(_('syntax error in %s(%d): child parent1'
-                                       '[,parent2] expected') % (path, i + 1))
+                    raise util.Abort(_('syntax error in %s(%d): child '\
+                                        'parent1[,parent2] expected') \
+                                        % (path, i + 1))
                 pp = []
                 for p in parents:
                     if p not in pp:
                         pp.append(p)
                 m[child] = pp
-        except IOError, e:
-            if e.errno != errno.ENOENT:
-                raise
+         # if file does not exist or error reading, exit
+        except IOError:
+            raise util.Abort(_('splicemap file not found or error reading %s:')
+                               % path)
         return m
 
 
--- a/hgext/convert/hg.py	Wed Apr 24 18:26:37 2013 -0700
+++ b/hgext/convert/hg.py	Thu Apr 25 11:50:26 2013 -0700
@@ -397,3 +397,7 @@
 
     def getbookmarks(self):
         return bookmarks.listbookmarks(self.repo)
+
+    def checkrevformat(self, revstr):
+        """ Mercurial, revision string is a 40 byte hex """
+        self.checkhexformat(revstr)
--- a/tests/test-convert-splicemap.t	Wed Apr 24 18:26:37 2013 -0700
+++ b/tests/test-convert-splicemap.t	Thu Apr 25 11:50:26 2013 -0700
@@ -37,6 +37,8 @@
   $ hg ci -Am addaandd
   adding a
   adding d
+  $ INVALIDID1=afd12345af
+  $ INVALIDID2=28173x36ddd1e67bf7098d541130558ef5534a86
   $ CHILDID1=`hg id --debug -i`
   $ echo d >> d
   $ hg ci -Am changed
@@ -53,7 +55,7 @@
   o  0:527cdedf31fb "addaandd" files: a d
   
 
-test invalid splicemap
+test invalid splicemap1
 
   $ cat > splicemap <<EOF
   > $CHILDID2
@@ -62,6 +64,24 @@
   abort: syntax error in splicemap(1): child parent1[,parent2] expected
   [255]
 
+test invalid splicemap2
+
+  $ cat > splicemap <<EOF
+  > $CHILDID2 $PARENTID1, $PARENTID2, $PARENTID2
+  > EOF
+  $ hg convert --splicemap splicemap repo2 repo1
+  abort: syntax error in splicemap(1): child parent1[,parent2] expected
+  [255]
+
+test invalid splicemap3
+
+  $ cat > splicemap <<EOF
+  > $INVALIDID1 $INVALIDID2
+  > EOF
+  $ hg convert --splicemap splicemap repo2 repo1
+  abort: splicemap entry afd12345af is not a valid revision identifier
+  [255]
+
 splice repo2 on repo1
 
   $ cat > splicemap <<EOF