--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/convert/subversion.py Sun Jul 01 23:56:11 2007 +0200
@@ -0,0 +1,511 @@
+# Subversion 1.4/1.5 Python API backend
+#
+# Copyright(C) 2007 Daniel Holth et al
+
+import pprint
+import locale
+
+from mercurial import util
+
+# Subversion stuff. Works best with very recent Python SVN bindings
+# e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing
+# these bindings.
+
+from svn.core import SubversionException, Pool
+import svn.core
+import svn.ra
+import svn.delta
+import svn
+import transport
+from cStringIO import StringIO
+
+from common import NoRepo, commit, converter_source, recode, nocommitmsg
+
+class CompatibilityException(Exception): pass
+
+nbRevisionsPerFetch = 50
+
+class svn_entry(object):
+ """Emulate a Subversion path change."""
+ __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action']
+ def __init__(self, entry):
+ self.copyfrom_path = entry.copyfrom_path
+ self.copyfrom_rev = entry.copyfrom_rev
+ self.action = entry.action
+
+ def __str__(self):
+ return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev)
+
+ def __repr__(self):
+ return self.__str__()
+
+class svn_paths(object):
+ """Emulate a Subversion ordered dictionary of changed paths."""
+ __slots__ = ['values', 'order']
+ def __init__(self, orig_paths):
+ self.order = []
+ self.values = {}
+ if hasattr(orig_paths, 'keys'):
+ self.order = sorted(orig_paths.keys())
+ self.values.update(orig_paths)
+ return
+ if not orig_paths:
+ return
+ for path in orig_paths:
+ self.order.append(path)
+ self.values[path] = svn_entry(orig_paths[path])
+ self.order.sort() # maybe the order it came in isn't so great...
+
+ def __iter__(self):
+ return iter(self.order)
+
+ def __getitem__(self, key):
+ return self.values[key]
+
+ def __str__(self):
+ s = "{\n"
+ for path in self.order:
+ s += "'%s': %s,\n" % (path, self.values[path])
+ s += "}"
+ return s
+
+ def __repr__(self):
+ return self.__str__()
+
+# SVN conversion code stolen from bzr-svn and tailor
+class convert_svn(converter_source):
+ def __init__(self, ui, url):
+ self.ui = ui
+ self.encoding = locale.getpreferredencoding()
+ try:
+ # Support file://path@rev syntax. Useful e.g. to convert
+ # deleted branches.
+ url, latest = url.rsplit("@", 1)
+ latest = int(latest)
+ except ValueError, e:
+ latest = None
+ self.url = url
+ self.encoding = 'UTF-8' # Subversion is always nominal UTF-8
+ try:
+ self.transport = transport.SvnRaTransport(url = url)
+ self.ra = self.transport.ra
+ self.base = svn.ra.get_repos_root(self.ra)
+ self.module = self.url[len(self.base):]
+ self.modulemap = {} # revision, module
+ self.commits = {}
+ self.files = {}
+ self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding)
+ except SubversionException, e:
+ raise NoRepo("couldn't open SVN repo %s" % url)
+
+ try:
+ self.get_blacklist()
+ except IOError, e:
+ pass
+
+ if not latest:
+ latest = svn.ra.get_latest_revnum(self.ra)
+ dirent = svn.ra.stat(self.ra, self.module, latest)
+ self.last_changed = dirent.created_rev
+
+ self.head = self.rev(self.last_changed)
+
+ # Should lazily fetch revisions in batches of, say, 1,000...:
+ self._fetch_revisions(from_revnum=self.last_changed, to_revnum=0)
+
+ def rev(self, revnum):
+ return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding)
+
+ def get_blacklist(self):
+ """Avoid certain revision numbers.
+ It is not uncommon for two nearby revisions to cancel each other
+ out, e.g. 'I copied trunk into a subdirectory of itself instead
+ of making a branch'. The converted repository is significantly
+ smaller if we ignore such revisions."""
+ self.blacklist = set()
+ blacklist = self.blacklist
+ for line in file("blacklist.txt", "r"):
+ if not line.startswith("#"):
+ try:
+ svn_rev = int(line.strip())
+ blacklist.add(svn_rev)
+ except ValueError, e:
+ pass # not an integer or a comment
+
+ def is_blacklisted(self, svn_rev):
+ return svn_rev in self.blacklist
+
+ def reparent(self, module):
+ svn_url = self.base + module
+ self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding))
+ svn.ra.reparent(self.ra, svn_url.encode(self.encoding))
+
+ def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, pb=None):
+ self.parent_cset = None
+ self.child_cset = None
+
+ self.ui.debug('Fetching revisions %d to %d\n' % (from_revnum, to_revnum))
+
+ def get_entry_from_path(path, module=self.module):
+ # Given the repository url of this wc, say
+ # "http://server/plone/CMFPlone/branches/Plone-2_0-branch"
+ # extract the "entry" portion (a relative path) from what
+ # svn log --xml says, ie
+ # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py"
+ # that is to say "tests/PloneTestCase.py"
+
+ if path.startswith(module):
+ relative = path[len(module):]
+ if relative.startswith('/'):
+ return relative[1:]
+ else:
+ return relative
+
+ # The path is outside our tracked tree...
+ self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module))
+ return None
+
+ received = []
+ def rcvr(*arg, **args):
+ orig_paths, revnum, author, date, message, pool = arg
+ new_orig_paths = svn_paths(orig_paths)
+ rcvr2(new_orig_paths, revnum, author, date, message, pool)
+
+ def rcvr2(orig_paths, revnum, author, date, message, pool, better_paths = None):
+ if not self.is_blacklisted(revnum):
+ received.append((orig_paths, revnum, author, date, message))
+
+ def after_received(orig_paths, revnum, author, date, message):
+ if revnum == 1172:
+ import pdb
+ pdb.set_trace()
+ if revnum in self.modulemap:
+ new_module = self.modulemap[revnum]
+ if new_module != self.module:
+ self.module = new_module
+ self.reparent(self.module)
+
+ copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions.
+ copies = {}
+ entries = []
+ self.ui.debug("Parsing revision %d\n" % revnum)
+ if orig_paths is not None:
+ rev = self.rev(revnum)
+ try:
+ branch = self.module.split("/")[-1]
+ except IndexError:
+ branch = None
+
+ for path in orig_paths:
+ # self.ui.write("path %s\n" % path)
+ if path == self.module: # Follow branching back in history
+ ent = orig_paths[path]
+ if ent:
+ if ent.copyfrom_path:
+ self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path
+ else:
+ self.ui.debug("No copyfrom path, don't know what to do.\n")
+ # Maybe it was added and there is no more history.
+ entrypath = get_entry_from_path(path, module=self.module)
+ # self.ui.write("entrypath %s\n" % entrypath)
+ if not entrypath:
+ # Outside our area of interest
+ self.ui.debug("boring@%s: %s\n" % (revnum, path))
+ continue
+ entry = entrypath.decode(self.encoding)
+ ent = orig_paths[path]
+
+ kind = svn.ra.check_path(self.ra, entrypath, revnum)
+ if kind == svn.core.svn_node_file:
+ if ent.copyfrom_path:
+ copyfrom_path = get_entry_from_path(ent.copyfrom_path)
+ if copyfrom_path:
+ self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev))
+ # It's probably important for hg that the source
+ # exists in the revision's parent, not just the
+ # ent.copyfrom_rev
+ fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev)
+ if fromkind != 0:
+ copies[self.recode(entry)] = self.recode(copyfrom_path)
+ entries.append(self.recode(entry))
+ elif kind == 0: # gone, but had better be a deleted *file*
+ self.ui.debug("gone from %s\n" % ent.copyfrom_rev)
+
+ fromrev = revnum - 1
+ # might always need to be revnum - 1 in these 3 lines?
+ old_module = self.modulemap.get(fromrev, self.module)
+ basepath = old_module + "/" + get_entry_from_path(path, module=self.module)
+ entrypath = old_module + "/" + get_entry_from_path(path, module=self.module)
+
+ def lookup_parts(p):
+ rc = None
+ parts = p.split("/")
+ for i in range(len(parts)):
+ part = "/".join(parts[:i])
+ info = part, copyfrom.get(part, None)
+ if info[1] is not None:
+ self.ui.debug("Found parent directory %s\n" % info)
+ rc = info
+ return rc
+
+ self.ui.debug("base, entry %s %s\n" % (basepath, entrypath))
+
+ frompath, froment = lookup_parts(entrypath) or (None, revnum - 1)
+
+ # need to remove fragment from lookup_parts and replace with copyfrom_path
+ if frompath is not None:
+ self.ui.debug("munge-o-matic\n")
+ self.ui.debug(entrypath + '\n')
+ self.ui.debug(entrypath[len(frompath):] + '\n')
+ entrypath = froment.copyfrom_path + entrypath[len(frompath):]
+ fromrev = froment.copyfrom_rev
+ self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath))
+
+ fromkind = svn.ra.check_path(self.ra, entrypath, fromrev)
+ if fromkind == svn.core.svn_node_file: # a deleted file
+ entries.append(self.recode(entry))
+ else:
+ # print "Deleted/moved non-file:", revnum, path, ent
+ # children = self._find_children(path, revnum - 1)
+ # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action)
+ # Sometimes this is tricky. For example: in
+ # The Subversion Repository revision 6940 a dir
+ # was copied and one of its files was deleted
+ # from the new location in the same commit. This
+ # code can't deal with that yet.
+ if ent.action == 'C':
+ children = self._find_children(path, fromrev)
+ else:
+ oroot = entrypath.strip('/')
+ nroot = path.strip('/')
+ children = self._find_children(oroot, fromrev)
+ children = [s.replace(oroot,nroot) for s in children]
+ # Mark all [files, not directories] as deleted.
+ for child in children:
+ # Can we move a child directory and its
+ # parent in the same commit? (probably can). Could
+ # cause problems if instead of revnum -1,
+ # we have to look in (copyfrom_path, revnum - 1)
+ entrypath = get_entry_from_path("/" + child, module=old_module)
+ if entrypath:
+ entry = self.recode(entrypath.decode(self.encoding))
+ if entry in copies:
+ # deleted file within a copy
+ del copies[entry]
+ else:
+ entries.append(entry)
+ elif kind == svn.core.svn_node_dir:
+ # Should probably synthesize normal file entries
+ # and handle as above to clean up copy/rename handling.
+
+ # If the directory just had a prop change,
+ # then we shouldn't need to look for its children.
+ # Also this could create duplicate entries. Not sure
+ # whether this will matter. Maybe should make entries a set.
+ # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev
+ # This will fail if a directory was copied
+ # from another branch and then some of its files
+ # were deleted in the same transaction.
+ children = self._find_children(path, revnum)
+ children.sort()
+ for child in children:
+ # Can we move a child directory and its
+ # parent in the same commit? (probably can). Could
+ # cause problems if instead of revnum -1,
+ # we have to look in (copyfrom_path, revnum - 1)
+ entrypath = get_entry_from_path("/" + child, module=self.module)
+ # print child, self.module, entrypath
+ if entrypath:
+ # Need to filter out directories here...
+ kind = svn.ra.check_path(self.ra, entrypath, revnum)
+ if kind != svn.core.svn_node_dir:
+ entries.append(self.recode(entrypath))
+
+ # Copies here (must copy all from source)
+ # Probably not a real problem for us if
+ # source does not exist
+
+ # Can do this with the copy command "hg copy"
+ # if ent.copyfrom_path:
+ # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding),
+ # module=self.module)
+ # copyto_entry = entrypath
+ #
+ # print "copy directory", copyfrom_entry, 'to', copyto_entry
+ #
+ # copies.append((copyfrom_entry, copyto_entry))
+
+ if ent.copyfrom_path:
+ copyfrom_path = ent.copyfrom_path.decode(self.encoding)
+ copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module)
+ if copyfrom_entry:
+ copyfrom[path] = ent
+ self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path]))
+
+ # Good, /probably/ a regular copy. Really should check
+ # to see whether the parent revision actually contains
+ # the directory in question.
+ children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev)
+ children.sort()
+ for child in children:
+ entrypath = get_entry_from_path("/" + child, module=self.module)
+ if entrypath:
+ entry = entrypath.decode(self.encoding)
+ # print "COPY COPY From", copyfrom_entry, entry
+ copyto_path = path + entry[len(copyfrom_entry):]
+ copyto_entry = get_entry_from_path(copyto_path, module=self.module)
+ # print "COPY", entry, "COPY To", copyto_entry
+ copies[self.recode(copyto_entry)] = self.recode(entry)
+ # copy from quux splort/quuxfile
+
+ self.modulemap[revnum] = self.module # track backwards in time
+ # a list of (filename, id) where id lets us retrieve the file.
+ # eg in git, id is the object hash. for svn it'll be the
+ self.files[rev] = zip(entries, [rev] * len(entries))
+
+ # Example SVN datetime. Includes microseconds.
+ # ISO-8601 conformant
+ # '2007-01-04T17:35:00.902377Z'
+ date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"])
+
+ log = message and self.recode(message) or nocommitmsg
+ author = author and self.recode(author) or ''
+
+ cset = commit(author=author,
+ date=util.datestr(date),
+ desc=log,
+ parents=[],
+ copies=copies,
+ branch=branch)
+
+ if self.child_cset is not None:
+ self.child_cset.parents = [rev]
+
+ self.child_cset = cset
+
+ self.commits[rev] = cset
+
+ try:
+ discover_changed_paths = True
+ strict_node_history = False
+ svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum,
+ 0, discover_changed_paths, strict_node_history, rcvr)
+ for args in received:
+ after_received(*args)
+ self.last_revnum = to_revnum
+ except SubversionException, (_, num):
+ if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION:
+ raise NoSuchRevision(branch=self,
+ revision="Revision number %d" % to_revnum)
+ raise
+
+ def getheads(self):
+ # svn-url@rev
+ # Not safe if someone committed:
+ self.heads = [self.head]
+ # print self.commits.keys()
+ return self.heads
+
+ def _getfile(self, file, rev):
+ io = StringIO()
+ # TODO: ra.get_file transmits the whole file instead of diffs.
+ mode = ''
+ try:
+ revnum = int(rev.split("@")[-1])
+ if self.module != self.modulemap[revnum]:
+ self.module = self.modulemap[revnum]
+ self.reparent(self.module)
+ info = svn.ra.get_file(self.ra, file, revnum, io)
+ if isinstance(info, list):
+ info = info[-1]
+ mode = ("svn:executable" in info) and 'x' or ''
+ mode = ("svn:special" in info) and 'l' or mode
+ except SubversionException, e:
+ notfound = (svn.core.SVN_ERR_FS_NOT_FOUND,
+ svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
+ if e.apr_err in notfound: # File not found
+ raise IOError()
+ raise
+ data = io.getvalue()
+ if mode == 'l':
+ link_prefix = "link "
+ if data.startswith(link_prefix):
+ data = data[len(link_prefix):]
+ return data, mode
+
+ def getfile(self, file, rev):
+ data, mode = self._getfile(file, rev)
+ self.modecache[(file, rev)] = mode
+ return data
+
+ def getmode(self, file, rev):
+ return self.modecache[(file, rev)]
+
+ def getchanges(self, rev):
+ self.modecache = {}
+ files = self.files[rev]
+ cl = files
+ cl.sort()
+ return cl
+
+ def getcommit(self, rev):
+ return self.commits[rev]
+
+ def gettags(self):
+ return []
+
+ def _find_children(self, path, revnum):
+ path = path.strip("/")
+
+ def _find_children_fallback(path, revnum):
+ # SWIG python bindings for getdir are broken up to at least 1.4.3
+ if not hasattr(self, 'client_ctx'):
+ self.client_ctx = svn.client.create_context()
+ optrev = svn.core.svn_opt_revision_t()
+ optrev.kind = svn.core.svn_opt_revision_number
+ optrev.value.number = revnum
+ rpath = '/'.join([self.url, path]).strip('/')
+ return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx).keys()]
+
+ if hasattr(self, '_find_children_fallback'):
+ return _find_children_fallback(path, revnum)
+
+ self.reparent("/" + path)
+ pool = Pool()
+
+ children = []
+ def find_children_inner(children, path, revnum = revnum):
+ if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4
+ fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL
+ getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool)
+ else:
+ getdir = svn.ra.get_dir(self.ra, path, revnum, pool)
+ if type(getdir) == dict:
+ # python binding for getdir is broken up to at least 1.4.3
+ raise CompatibilityException()
+ dirents = getdir[0]
+ if type(dirents) == int:
+ # got here once due to infinite recursion bug
+ # pprint.pprint(getdir)
+ return
+ c = dirents.keys()
+ c.sort()
+ for child in c:
+ dirent = dirents[child]
+ if dirent.kind == svn.core.svn_node_dir:
+ find_children_inner(children, (path + "/" + child).strip("/"))
+ else:
+ children.append((path + "/" + child).strip("/"))
+
+ try:
+ find_children_inner(children, "")
+ except CompatibilityException:
+ self._find_children_fallback = True
+ self.reparent(self.module)
+ return _find_children_fallback(path, revnum)
+
+ self.reparent(self.module)
+ return [path + "/" + c for c in children]
+
+ def recode(self, s):
+ return recode(self.encoding, s)