# HG changeset patch # User Ben Goswami # Date 1366915826 25200 # Node ID 58e782f076e700e4f7005428b271cab2e1796b3b # Parent 61f1223ab3581370f1109af302c52653a9ca05cd splicemap: improve error handling when source is hg (issue2084) 1. Introduced 2 levels of error handling for splicemap files a. Check the splicemap file for rules which are same across different types of source repos. This is done through enhancing parsesplicemap function b. Check revision string formats. Each repo may have their own format. This is done usign checkrevformat function c. Implemented the above two for hg diff -r 61f1223ab358 -r 58e782f076e7 hgext/convert/common.py --- a/hgext/convert/common.py Wed Apr 24 18:26:37 2013 -0700 +++ b/hgext/convert/common.py Thu Apr 25 11:50:26 2013 -0700 @@ -5,7 +5,7 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -import base64, errno, subprocess, os, datetime +import base64, errno, subprocess, os, datetime, re import cPickle as pickle from mercurial import util from mercurial.i18n import _ @@ -63,6 +63,15 @@ self.encoding = 'utf-8' + def checkhexformat(self, revstr): + """ fails if revstr is not a 40 byte hex. mercurial and git both uses + such format for their revision numbering + """ + matchobj = re.match(r'[0-9a-fA-F]{40,40}$', revstr) + if matchobj is None: + raise util.Abort(_('splicemap entry %s is not a valid revision' + ' identifier') % revstr) + def before(self): pass @@ -164,6 +173,13 @@ """ return {} + def checkrevformat(self, revstr): + """revstr is a string that describes a revision in the given + source control system. Return true if revstr has correct + format. + """ + return True + class converter_sink(object): """Conversion sink (target) interface""" diff -r 61f1223ab358 -r 58e782f076e7 hgext/convert/convcmd.py --- a/hgext/convert/convcmd.py Wed Apr 24 18:26:37 2013 -0700 +++ b/hgext/convert/convcmd.py Thu Apr 25 11:50:26 2013 -0700 @@ -121,9 +121,17 @@ self.splicemap = self.parsesplicemap(opts.get('splicemap')) self.branchmap = mapfile(ui, opts.get('branchmap')) + def parsesplicemap(self, path): + """ check and validate the splicemap format and + return a child/parents dictionary. + Format checking has two parts. + 1. generic format which is same across all source types + 2. specific format checking which may be different for + different source type. This logic is implemented in + checkrevformat function in source files like + hg.py, subversion.py etc. + """ - def parsesplicemap(self, path): - """Parse a splicemap, return a child/parents dictionary.""" if not path: return {} m = {} @@ -136,18 +144,28 @@ continue try: child, parents = line.split(' ', 1) + self.source.checkrevformat(child) parents = parents.replace(',', ' ').split() + # check if number of parents are upto 2 max + if (len(parents) > 2): + raise util.Abort(_('syntax error in %s(%d): child '\ + 'parent1[,parent2] expected') \ + % (path, i + 1)) + for parent in parents: + self.source.checkrevformat(parent) except ValueError: - raise util.Abort(_('syntax error in %s(%d): child parent1' - '[,parent2] expected') % (path, i + 1)) + raise util.Abort(_('syntax error in %s(%d): child '\ + 'parent1[,parent2] expected') \ + % (path, i + 1)) pp = [] for p in parents: if p not in pp: pp.append(p) m[child] = pp - except IOError, e: - if e.errno != errno.ENOENT: - raise + # if file does not exist or error reading, exit + except IOError: + raise util.Abort(_('splicemap file not found or error reading %s:') + % path) return m diff -r 61f1223ab358 -r 58e782f076e7 hgext/convert/hg.py --- a/hgext/convert/hg.py Wed Apr 24 18:26:37 2013 -0700 +++ b/hgext/convert/hg.py Thu Apr 25 11:50:26 2013 -0700 @@ -397,3 +397,7 @@ def getbookmarks(self): return bookmarks.listbookmarks(self.repo) + + def checkrevformat(self, revstr): + """ Mercurial, revision string is a 40 byte hex """ + self.checkhexformat(revstr) diff -r 61f1223ab358 -r 58e782f076e7 tests/test-convert-splicemap.t --- a/tests/test-convert-splicemap.t Wed Apr 24 18:26:37 2013 -0700 +++ b/tests/test-convert-splicemap.t Thu Apr 25 11:50:26 2013 -0700 @@ -37,6 +37,8 @@ $ hg ci -Am addaandd adding a adding d + $ INVALIDID1=afd12345af + $ INVALIDID2=28173x36ddd1e67bf7098d541130558ef5534a86 $ CHILDID1=`hg id --debug -i` $ echo d >> d $ hg ci -Am changed @@ -53,7 +55,7 @@ o 0:527cdedf31fb "addaandd" files: a d -test invalid splicemap +test invalid splicemap1 $ cat > splicemap < $CHILDID2 @@ -62,6 +64,24 @@ abort: syntax error in splicemap(1): child parent1[,parent2] expected [255] +test invalid splicemap2 + + $ cat > splicemap < $CHILDID2 $PARENTID1, $PARENTID2, $PARENTID2 + > EOF + $ hg convert --splicemap splicemap repo2 repo1 + abort: syntax error in splicemap(1): child parent1[,parent2] expected + [255] + +test invalid splicemap3 + + $ cat > splicemap < $INVALIDID1 $INVALIDID2 + > EOF + $ hg convert --splicemap splicemap repo2 repo1 + abort: splicemap entry afd12345af is not a valid revision identifier + [255] + splice repo2 on repo1 $ cat > splicemap <