purge: abort with missing files avoiding problems with name-mangling fs
In a name mangling filesystem (e.g. a case insensitive one)
dirstate.walk() can yield filenames different from the ones
stored in the dirstate. This already confuses the status and
add commands, but with purge this may cause data loss.
To prevent this purge refuses to work if there are missing
files and has a 'force' option if the user knows it is safe.
Even with the force option purge checks if any of the missing
files is still available in the working dir: if so there
may be some problem with the underlying filesystem, so it
unconditionally aborts.
#!/usr/bin/env python
#
# This is a generalized framework for converting between SCM
# repository formats.
#
# To use, run:
#
# convert-repo <source> [<dest> [<mapfile>]]
#
# Currently accepted source formats: git, cvs
# Currently accepted destination formats: hg
#
# If destination isn't given, a new Mercurial repo named <src>-hg will
# be created. If <mapfile> isn't given, it will be put in a default
# location (<dest>/.hg/shamap by default)
#
# The <mapfile> is a simple text file that maps each source commit ID to
# the destination ID for that revision, like so:
#
# <source ID> <destination ID>
#
# If the file doesn't exist, it's automatically created. It's updated
# on each commit copied, so convert-repo can be interrupted and can
# be run repeatedly to copy new commits.
import sys, os, zlib, sha, time, re, locale, socket
os.environ["HGENCODING"] = "utf-8"
from mercurial import hg, ui, util, fancyopts
class Abort(Exception): pass
class NoRepo(Exception): pass
class commit:
def __init__(self, **parts):
for x in "author date desc parents".split():
if not x in parts:
abort("commit missing field %s\n" % x)
self.__dict__.update(parts)
quiet = 0
def status(msg):
if not quiet: sys.stdout.write(str(msg))
def warn(msg):
sys.stderr.write(str(msg))
def abort(msg):
raise Abort(msg)
def recode(s):
try:
return s.decode("utf-8").encode("utf-8")
except:
try:
return s.decode("latin-1").encode("utf-8")
except:
return s.decode("utf-8", "replace").encode("utf-8")
# CVS conversion code inspired by hg-cvs-import and git-cvsimport
class convert_cvs:
def __init__(self, path):
self.path = path
cvs = os.path.join(path, "CVS")
if not os.path.exists(cvs):
raise NoRepo("couldn't open CVS repo %s" % path)
self.changeset = {}
self.files = {}
self.tags = {}
self.lastbranch = {}
self.parent = {}
self.socket = None
self.cvsroot = file(os.path.join(cvs, "Root")).read()[:-1]
self.cvsrepo = file(os.path.join(cvs, "Repository")).read()[:-1]
self.encoding = locale.getpreferredencoding()
self._parse()
self._connect()
def _parse(self):
if self.changeset:
return
d = os.getcwd()
try:
os.chdir(self.path)
id = None
state = 0
for l in os.popen("cvsps -A -u --cvs-direct -q"):
if state == 0: # header
if l.startswith("PatchSet"):
id = l[9:-2]
elif l.startswith("Date"):
date = util.parsedate(l[6:-1], ["%Y/%m/%d %H:%M:%S"])
date = util.datestr(date)
elif l.startswith("Branch"):
branch = l[8:-1]
self.parent[id] = self.lastbranch.get(branch,'bad')
self.lastbranch[branch] = id
elif l.startswith("Ancestor branch"):
ancestor = l[17:-1]
self.parent[id] = self.lastbranch[ancestor]
elif l.startswith("Author"):
author = self.recode(l[8:-1])
elif l.startswith("Tag: "):
t = l[5:-1].rstrip()
if t != "(none)":
self.tags[t] = id
elif l.startswith("Log:"):
state = 1
log = ""
elif state == 1: # log
if l == "Members: \n":
files = {}
log = self.recode(log[:-1])
if log.isspace():
log = "*** empty log message ***\n"
state = 2
else:
log += l
elif state == 2:
if l == "\n": #
state = 0
p = [self.parent[id]]
if id == "1":
p = []
c = commit(author=author, date=date, parents=p,
desc=log, branch=branch)
self.changeset[id] = c
self.files[id] = files
else:
file,rev = l[1:-2].rsplit(':',1)
rev = rev.split("->")[1]
files[file] = rev
self.heads = self.lastbranch.values()
finally:
os.chdir(d)
def _connect(self):
root = self.cvsroot
conntype = None
user, host = None, None
cmd = ['cvs', 'server']
status("connecting to %s\n" % root)
if root.startswith(":pserver:"):
root = root[9:]
m = re.match(r'(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?(.*)', root)
if m:
conntype = "pserver"
user, passw, serv, port, root = m.groups()
if not user:
user = "anonymous"
rr = ":pserver:" + user + "@" + serv + ":" + root
if port:
rr2, port = "-", int(port)
else:
rr2, port = rr, 2401
rr += str(port)
if not passw:
passw = "A"
pf = open(os.path.join(os.environ["HOME"], ".cvspass"))
for l in pf:
# :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
m = re.match(r'(/\d+\s+/)?(.*)', l)
l = m.group(2)
w, p = l.split(' ', 1)
if w in [rr, rr2]:
passw = p
break
pf.close()
sck = socket.socket()
sck.connect((serv, port))
sck.send("\n".join(["BEGIN AUTH REQUEST", root, user, passw, "END AUTH REQUEST", ""]))
if sck.recv(128) != "I LOVE YOU\n":
raise NoRepo("CVS pserver authentication failed")
self.writep = self.readp = sck.makefile('r+')
if not conntype and root.startswith(":local:"):
conntype = "local"
root = root[7:]
if not conntype:
# :ext:user@host/home/user/path/to/cvsroot
if root.startswith(":ext:"):
root = root[5:]
m = re.match(r'(?:([^@:/]+)@)?([^:/]+):?(.*)', root)
if not m:
conntype = "local"
else:
conntype = "rsh"
user, host, root = m.group(1), m.group(2), m.group(3)
if conntype != "pserver":
if conntype == "rsh":
rsh = os.environ.get("CVS_RSH" or "rsh")
if user:
cmd = [rsh, '-l', user, host] + cmd
else:
cmd = [rsh, host] + cmd
self.writep, self.readp = os.popen2(cmd)
self.realroot = root
self.writep.write("Root %s\n" % root)
self.writep.write("Valid-responses ok error Valid-requests Mode"
" M Mbinary E Checked-in Created Updated"
" Merged Removed\n")
self.writep.write("valid-requests\n")
self.writep.flush()
r = self.readp.readline()
if not r.startswith("Valid-requests"):
abort("server sucks\n")
if "UseUnchanged" in r:
self.writep.write("UseUnchanged\n")
self.writep.flush()
r = self.readp.readline()
def getheads(self):
return self.heads
def _getfile(self, name, rev):
if rev.endswith("(DEAD)"):
raise IOError
args = ("-N -P -kk -r %s --" % rev).split()
args.append(os.path.join(self.cvsrepo, name))
for x in args:
self.writep.write("Argument %s\n" % x)
self.writep.write("Directory .\n%s\nco\n" % self.realroot)
self.writep.flush()
data = ""
while 1:
line = self.readp.readline()
if line.startswith("Created ") or line.startswith("Updated "):
self.readp.readline() # path
self.readp.readline() # entries
mode = self.readp.readline()[:-1]
count = int(self.readp.readline()[:-1])
data = self.readp.read(count)
elif line.startswith(" "):
data += line[1:]
elif line.startswith("M "):
pass
elif line.startswith("Mbinary "):
count = int(self.readp.readline()[:-1])
data = self.readp.read(count)
else:
if line == "ok\n":
return (data, "x" in mode and "x" or "")
elif line.startswith("E "):
warn("cvs server: %s\n" % line[2:])
elif line.startswith("Remove"):
l = self.readp.readline()
l = self.readp.readline()
if l != "ok\n":
abort("unknown CVS response: %s\n" % l)
else:
abort("unknown CVS response: %s\n" % line)
def getfile(self, file, rev):
data, mode = self._getfile(file, rev)
self.modecache[(file, rev)] = mode
return data
def getmode(self, file, rev):
return self.modecache[(file, rev)]
def getchanges(self, rev):
self.modecache = {}
files = self.files[rev]
cl = files.items()
cl.sort()
return cl
def recode(self, text):
return text.decode(self.encoding, "replace").encode("utf-8")
def getcommit(self, rev):
return self.changeset[rev]
def gettags(self):
return self.tags
class convert_git:
def __init__(self, path):
if os.path.isdir(path + "/.git"):
path += "/.git"
self.path = path
if not os.path.exists(path + "/objects"):
raise NoRepo("couldn't open GIT repo %s" % path)
def getheads(self):
fh = os.popen("GIT_DIR=%s git-rev-parse --verify HEAD" % self.path)
return [fh.read()[:-1]]
def catfile(self, rev, type):
if rev == "0" * 40: raise IOError()
fh = os.popen("GIT_DIR=%s git-cat-file %s %s 2>/dev/null" % (self.path, type, rev))
return fh.read()
def getfile(self, name, rev):
return self.catfile(rev, "blob")
def getmode(self, name, rev):
return self.modecache[(name, rev)]
def getchanges(self, version):
self.modecache = {}
fh = os.popen("GIT_DIR=%s git-diff-tree --root -m -r %s" % (self.path, version))
changes = []
for l in fh:
if "\t" not in l: continue
m, f = l[:-1].split("\t")
m = m.split()
h = m[3]
p = (m[1] == "100755")
s = (m[1] == "120000")
self.modecache[(f, h)] = (p and "x") or (s and "l") or ""
changes.append((f, h))
return changes
def getcommit(self, version):
c = self.catfile(version, "commit") # read the commit hash
end = c.find("\n\n")
message = c[end+2:]
message = recode(message)
l = c[:end].splitlines()
manifest = l[0].split()[1]
parents = []
for e in l[1:]:
n,v = e.split(" ", 1)
if n == "author":
p = v.split()
tm, tz = p[-2:]
author = " ".join(p[:-2])
if author[0] == "<": author = author[1:-1]
author = recode(author)
if n == "committer":
p = v.split()
tm, tz = p[-2:]
committer = " ".join(p[:-2])
if committer[0] == "<": committer = committer[1:-1]
committer = recode(committer)
message += "\ncommitter: %s\n" % committer
if n == "parent": parents.append(v)
tzs, tzh, tzm = tz[-5:-4] + "1", tz[-4:-2], tz[-2:]
tz = -int(tzs) * (int(tzh) * 3600 + int(tzm))
date = tm + " " + str(tz)
c = commit(parents=parents, date=date, author=author, desc=message)
return c
def gettags(self):
tags = {}
fh = os.popen('git-ls-remote --tags "%s" 2>/dev/null' % self.path)
prefix = 'refs/tags/'
for line in fh:
line = line.strip()
if not line.endswith("^{}"):
continue
node, tag = line.split(None, 1)
if not tag.startswith(prefix):
continue
tag = tag[len(prefix):-3]
tags[tag] = node
return tags
class convert_mercurial:
def __init__(self, path):
self.path = path
u = ui.ui()
try:
self.repo = hg.repository(u, path)
except:
raise NoRepo("could open hg repo %s" % path)
def mapfile(self):
return os.path.join(self.path, ".hg", "shamap")
def getheads(self):
h = self.repo.changelog.heads()
return [ hg.hex(x) for x in h ]
def putfile(self, f, e, data):
self.repo.wwrite(f, data, e)
if self.repo.dirstate.state(f) == '?':
self.repo.dirstate.update([f], "a")
def delfile(self, f):
try:
os.unlink(self.repo.wjoin(f))
#self.repo.remove([f])
except:
pass
def putcommit(self, files, parents, commit):
seen = {}
pl = []
for p in parents:
if p not in seen:
pl.append(p)
seen[p] = 1
parents = pl
if len(parents) < 2: parents.append("0" * 40)
if len(parents) < 2: parents.append("0" * 40)
p2 = parents.pop(0)
text = commit.desc
extra = {}
try:
extra["branch"] = commit.branch
except AttributeError:
pass
while parents:
p1 = p2
p2 = parents.pop(0)
a = self.repo.rawcommit(files, text, commit.author, commit.date,
hg.bin(p1), hg.bin(p2), extra=extra)
text = "(octopus merge fixup)\n"
p2 = hg.hex(self.repo.changelog.tip())
return p2
def puttags(self, tags):
try:
old = self.repo.wfile(".hgtags").read()
oldlines = old.splitlines(1)
oldlines.sort()
except:
oldlines = []
k = tags.keys()
k.sort()
newlines = []
for tag in k:
newlines.append("%s %s\n" % (tags[tag], tag))
newlines.sort()
if newlines != oldlines:
status("updating tags\n")
f = self.repo.wfile(".hgtags", "w")
f.write("".join(newlines))
f.close()
if not oldlines: self.repo.add([".hgtags"])
date = "%s 0" % int(time.mktime(time.gmtime()))
self.repo.rawcommit([".hgtags"], "update tags", "convert-repo",
date, self.repo.changelog.tip(), hg.nullid)
return hg.hex(self.repo.changelog.tip())
converters = [convert_cvs, convert_git, convert_mercurial]
def converter(path):
if not os.path.isdir(path):
abort("%s: not a directory\n" % path)
for c in converters:
try:
return c(path)
except NoRepo:
pass
abort("%s: unknown repository type\n" % path)
class convert:
def __init__(self, source, dest, mapfile, opts):
self.source = source
self.dest = dest
self.mapfile = mapfile
self.opts = opts
self.commitcache = {}
self.map = {}
try:
for l in file(self.mapfile):
sv, dv = l[:-1].split()
self.map[sv] = dv
except IOError:
pass
def walktree(self, heads):
visit = heads
known = {}
parents = {}
while visit:
n = visit.pop(0)
if n in known or n in self.map: continue
known[n] = 1
self.commitcache[n] = self.source.getcommit(n)
cp = self.commitcache[n].parents
for p in cp:
parents.setdefault(n, []).append(p)
visit.append(p)
return parents
def toposort(self, parents):
visit = parents.keys()
seen = {}
children = {}
while visit:
n = visit.pop(0)
if n in seen: continue
seen[n] = 1
pc = 0
if n in parents:
for p in parents[n]:
if p not in self.map: pc += 1
visit.append(p)
children.setdefault(p, []).append(n)
if not pc: root = n
s = []
removed = {}
visit = children.keys()
while visit:
n = visit.pop(0)
if n in removed: continue
dep = 0
if n in parents:
for p in parents[n]:
if p in self.map: continue
if p not in removed:
# we're still dependent
visit.append(n)
dep = 1
break
if not dep:
# all n's parents are in the list
removed[n] = 1
if n not in self.map:
s.append(n)
if n in children:
for c in children[n]:
visit.insert(0, c)
if opts.get('datesort'):
depth = {}
for n in s:
depth[n] = 0
pl = [p for p in self.commitcache[n].parents if p not in self.map]
if pl:
depth[n] = max([depth[p] for p in pl]) + 1
s = [(depth[n], self.commitcache[n].date, n) for n in s]
s.sort()
s = [e[2] for e in s]
return s
def copy(self, rev):
c = self.commitcache[rev]
files = self.source.getchanges(rev)
for f,v in files:
try:
data = self.source.getfile(f, v)
except IOError, inst:
self.dest.delfile(f)
else:
e = self.source.getmode(f, v)
self.dest.putfile(f, e, data)
r = [self.map[v] for v in c.parents]
f = [f for f,v in files]
self.map[rev] = self.dest.putcommit(f, r, c)
file(self.mapfile, "a").write("%s %s\n" % (rev, self.map[rev]))
def convert(self):
status("scanning source...\n")
heads = self.source.getheads()
parents = self.walktree(heads)
status("sorting...\n")
t = self.toposort(parents)
num = len(t)
c = None
status("converting...\n")
for c in t:
num -= 1
desc = self.commitcache[c].desc
if "\n" in desc:
desc = desc.splitlines()[0]
status("%d %s\n" % (num, desc))
self.copy(c)
tags = self.source.gettags()
ctags = {}
for k in tags:
v = tags[k]
if v in self.map:
ctags[k] = self.map[v]
if c and ctags:
nrev = self.dest.puttags(ctags)
# write another hash correspondence to override the previous
# one so we don't end up with extra tag heads
if nrev:
file(self.mapfile, "a").write("%s %s\n" % (c, nrev))
def command(src, dest=None, mapfile=None, **opts):
srcc = converter(src)
if not hasattr(srcc, "getcommit"):
abort("%s: can't read from this repo type\n" % src)
if not dest:
dest = src + "-hg"
status("assuming destination %s\n" % dest)
if not os.path.isdir(dest):
status("creating repository %s\n" % dest)
os.system("hg init " + dest)
destc = converter(dest)
if not hasattr(destc, "putcommit"):
abort("%s: can't write to this repo type\n" % src)
if not mapfile:
try:
mapfile = destc.mapfile()
except:
mapfile = os.path.join(destc, "map")
c = convert(srcc, destc, mapfile, opts)
c.convert()
options = [('q', 'quiet', None, 'suppress output'),
('', 'datesort', None, 'try to sort changesets by date')]
opts = {}
args = fancyopts.fancyopts(sys.argv[1:], options, opts)
if opts['quiet']:
quiet = 1
try:
command(*args, **opts)
except Abort, inst:
warn(inst)
except KeyboardInterrupt:
status("interrupted\n")