# HG changeset patch # User Matt Mackall # Date 1166831974 21600 # Node ID fad134931327b69db6f9a579a7b410bea0473c04 # Parent 32c1653b7dad261111ed84d2e72286048a60a459 convert-repo: add basic CVS import support diff -r 32c1653b7dad -r fad134931327 contrib/convert-repo --- a/contrib/convert-repo Fri Dec 22 22:53:16 2006 +0100 +++ b/contrib/convert-repo Fri Dec 22 17:59:34 2006 -0600 @@ -7,7 +7,7 @@ # # convert-repo [ []] # -# Currently accepted source formats: git +# Currently accepted source formats: git, cvs # Currently accepted destination formats: hg # # If destination isn't given, a new Mercurial repo named -hg will @@ -23,11 +23,12 @@ # on each commit copied, so convert-repo can be interrupted and can # be run repeatedly to copy new commits. -import sys, os, zlib, sha, time +import sys, os, zlib, sha, time, re, locale os.environ["HGENCODING"] = "utf-8" from mercurial import hg, ui, util, fancyopts class Abort(Exception): pass +class NoRepo(Exception): pass quiet = 0 def status(msg): @@ -48,13 +49,198 @@ except: return s.decode("utf-8", "replace").encode("utf-8") +# CVS conversion code inspired by hg-cvs-import and git-cvsimport +class convert_cvs: + def __init__(self, path): + self.path = path + cvs = os.path.join(path, "CVS") + if not os.path.exists(cvs): + raise NoRepo("couldn't open CVS repo %s" % path) + + self.changeset = {} + self.tags = {} + self.lastbranch = {} + self.parent = {} + self.socket = None + self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1] + self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1] + self.encoding = locale.getpreferredencoding() + self._parse() + self._connect() + + def _parse(self): + if self.changeset: + return + + d = os.getcwd() + try: + os.chdir(self.path) + id = None + state = 0 + for l in os.popen("cvsps -A"): + if state == 0: # header + if l.startswith("PatchSet"): + id = l[9:-2] + elif l.startswith("Date"): + date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"]) + date = util.datestr(date) + elif l.startswith("Branch"): + branch = l[8:-1] + self.parent[id] = self.lastbranch.get(branch,'bad') + self.lastbranch[branch] = id + elif l.startswith("Ancestor branch"): + ancestor = l[17:-1] + self.parent[id] = self.lastbranch[ancestor] + elif l.startswith("Author"): + author = self.recode(l[8:-1]) + elif l.startswith("Tag: "): + t = l[5:-1] + if t != "(none) ": + self.tags[t] = id + elif l.startswith("Log:"): + state = 1 + log = "" + elif state == 1: # log + if l == "Members: \n": + files = {} + log = self.recode(log[:-1]) + if log.isspace(): + log = "*** empty log message ***\n" + state = 2 + else: + log += l + elif state == 2: + if l == "\n": # + state = 0 + self.changeset[id] = (date, author, log, files) + else: + file,rev = l[1:-2].rsplit(':',1) + rev = rev.split("->")[1] + files[file] = rev + + self.heads = self.lastbranch.values() + finally: + os.chdir(d) + + def _connect(self): + root = self.cvsroot + local = False + user, host = None, None + cmd = ['cvs', 'server'] + + status("connecting to %s\n" % root) + + # only non-pserver for now + if root.startswith(":pserver"): + abort("can't handle pserver mode yet: %s\n" % root) + + if root.startswith(":local:"): + local = True + root = root[7:] + else: + # :ext:user@host/home/user/path/to/cvsroot + if root.startswith(":ext:"): + root = root[5:] + m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root) + if not m: + local = True + else: + local = False + user, host, root = m.group(1), m.group(2), m.group(3) + + if not local: + rsh = os.environ.get("CVS_RSH" or "rsh") + if user: + cmd = [rsh, '-l', user, host] + cmd + else: + cmd = [rsh, host] + cmd + + self.writep, self.readp = os.popen2(cmd) + self.realroot = root + + self.writep.write("Root %s\n" % root) + self.writep.write("Valid-responses ok error Valid-requests Mode" + " M Mbinary E Checked-in Created Updated" + " Merged Removed\n") + self.writep.write("valid-requests\n") + self.writep.flush() + r = self.readp.readline() + if not r.startswith("Valid-requests"): + abort("server sucks\n") + if "UseUnchanged" in r: + self.writep.write("UseUnchanged\n") + self.writep.flush() + r = self.readp.readline() + + def getheads(self): + return self.heads + + def getfile(self, name, rev): + if rev.endswith("(DEAD)"): + raise IOError + + args = ("-N -P -kk -r %s --" % rev).split() + args.append(os.path.join(self.cvsrepo, name)) + for x in args: + self.writep.write("Argument %s\n" % x) + self.writep.write("Directory .\n%s\nco\n" % self.realroot) + self.writep.flush() + + data = "" + while 1: + line = self.readp.readline() + if line.startswith("Created ") or line.startswith("Updated "): + self.readp.readline() # path + self.readp.readline() # entries + mode = self.readp.readline()[:-1] + count = int(self.readp.readline()[:-1]) + data = self.readp.read(count) + elif line.startswith(" "): + data += line[1:] + elif line.startswith("M "): + pass + elif line.startswith("Mbinary "): + count = int(self.readp.readline()[:-1]) + data = self.readp.read(count) + else: + if line == "ok\n": + return data + elif line.startswith("E "): + warn("cvs server: %s\n" % line[2:]) + elif line.startswith("Remove"): + l = self.readp.readline() + l = self.readp.readline() + if l != "ok\n": + abort("unknown CVS response: %s\n" % l) + else: + abort("unknown CVS response: %s\n" % line) + + def getchanges(self, rev): + files = self.changeset[rev][3] + cl = [ (f, r, 0) for f,r in files.items() ] + cl.sort() + return cl + + def recode(self, text): + return text.decode(self.encoding, "replace").encode("utf-8") + + def getcommit(self, rev): + cs = self.changeset[rev] + parents = [self.parent[rev]] + if rev == "1": + parents = [] + return (parents, cs[1], cs[0], cs[2]) + + def gettags(self): + return self.tags + class convert_git: def __init__(self, path): if os.path.isdir(path + "/.git"): path += "/.git" self.path = path if not os.path.exists(path + "/HEAD"): - raise TypeError("couldn't open GIT repo %s" % path) + raise NoRepo("couldn't open GIT repo %s" % path) def getheads(self): fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path) @@ -129,7 +315,7 @@ try: self.repo = hg.repository(u, path) except: - raise TypeError("could open hg repo %s" % path) + raise NoRepo("could open hg repo %s" % path) def mapfile(self): return os.path.join(self.path, ".hg", "shamap") @@ -168,8 +354,8 @@ while parents: p1 = p2 p2 = parents.pop(0) - self.repo.rawcommit(files, text, author, dest, - hg.bin(p1), hg.bin(p2)) + a = self.repo.rawcommit(files, text, author, dest, + hg.bin(p1), hg.bin(p2)) text = "(octopus merge fixup)\n" p2 = hg.hex(self.repo.changelog.tip()) @@ -202,7 +388,7 @@ date, self.repo.changelog.tip(), hg.nullid) return hg.hex(self.repo.changelog.tip()) -converters = [convert_git, convert_mercurial] +converters = [convert_cvs, convert_git, convert_mercurial] def converter(path): if not os.path.isdir(path): @@ -210,7 +396,7 @@ for c in converters: try: return c(path) - except TypeError: + except NoRepo: pass abort("%s: unknown repository type\n" % path) @@ -319,7 +505,9 @@ status("converting...\n") for c in t: num -= 1 - desc = self.commitcache[c][3].splitlines()[0] + desc = self.commitcache[c][3] + if "\n" in desc: + desc = desc.splitlines()[0] status("%d %s\n" % (num, desc)) self.copy(c)