Mercurial > hg
comparison hgext/convert/subversion.py @ 4765:b6a1f2c46c6c
convert extension: Add SVN converter
author | Daniel Holth <dholth@fastmail.fm> |
---|---|
date | Sun, 01 Jul 2007 23:56:11 +0200 |
parents | |
children | 95cbb6b74790 |
comparison
equal
deleted
inserted
replaced
4764:6a16ef0d1c7c | 4765:b6a1f2c46c6c |
---|---|
1 # Subversion 1.4/1.5 Python API backend | |
2 # | |
3 # Copyright(C) 2007 Daniel Holth et al | |
4 | |
5 import pprint | |
6 import locale | |
7 | |
8 from mercurial import util | |
9 | |
10 # Subversion stuff. Works best with very recent Python SVN bindings | |
11 # e.g. SVN 1.5 or backports. Thanks to the bzr folks for enhancing | |
12 # these bindings. | |
13 | |
14 from svn.core import SubversionException, Pool | |
15 import svn.core | |
16 import svn.ra | |
17 import svn.delta | |
18 import svn | |
19 import transport | |
20 from cStringIO import StringIO | |
21 | |
22 from common import NoRepo, commit, converter_source, recode, nocommitmsg | |
23 | |
24 class CompatibilityException(Exception): pass | |
25 | |
26 nbRevisionsPerFetch = 50 | |
27 | |
28 class svn_entry(object): | |
29 """Emulate a Subversion path change.""" | |
30 __slots__ = ['path', 'copyfrom_path', 'copyfrom_rev', 'action'] | |
31 def __init__(self, entry): | |
32 self.copyfrom_path = entry.copyfrom_path | |
33 self.copyfrom_rev = entry.copyfrom_rev | |
34 self.action = entry.action | |
35 | |
36 def __str__(self): | |
37 return "%s %s %s" % (self.action, self.copyfrom_path, self.copyfrom_rev) | |
38 | |
39 def __repr__(self): | |
40 return self.__str__() | |
41 | |
42 class svn_paths(object): | |
43 """Emulate a Subversion ordered dictionary of changed paths.""" | |
44 __slots__ = ['values', 'order'] | |
45 def __init__(self, orig_paths): | |
46 self.order = [] | |
47 self.values = {} | |
48 if hasattr(orig_paths, 'keys'): | |
49 self.order = sorted(orig_paths.keys()) | |
50 self.values.update(orig_paths) | |
51 return | |
52 if not orig_paths: | |
53 return | |
54 for path in orig_paths: | |
55 self.order.append(path) | |
56 self.values[path] = svn_entry(orig_paths[path]) | |
57 self.order.sort() # maybe the order it came in isn't so great... | |
58 | |
59 def __iter__(self): | |
60 return iter(self.order) | |
61 | |
62 def __getitem__(self, key): | |
63 return self.values[key] | |
64 | |
65 def __str__(self): | |
66 s = "{\n" | |
67 for path in self.order: | |
68 s += "'%s': %s,\n" % (path, self.values[path]) | |
69 s += "}" | |
70 return s | |
71 | |
72 def __repr__(self): | |
73 return self.__str__() | |
74 | |
75 # SVN conversion code stolen from bzr-svn and tailor | |
76 class convert_svn(converter_source): | |
77 def __init__(self, ui, url): | |
78 self.ui = ui | |
79 self.encoding = locale.getpreferredencoding() | |
80 try: | |
81 # Support file://path@rev syntax. Useful e.g. to convert | |
82 # deleted branches. | |
83 url, latest = url.rsplit("@", 1) | |
84 latest = int(latest) | |
85 except ValueError, e: | |
86 latest = None | |
87 self.url = url | |
88 self.encoding = 'UTF-8' # Subversion is always nominal UTF-8 | |
89 try: | |
90 self.transport = transport.SvnRaTransport(url = url) | |
91 self.ra = self.transport.ra | |
92 self.base = svn.ra.get_repos_root(self.ra) | |
93 self.module = self.url[len(self.base):] | |
94 self.modulemap = {} # revision, module | |
95 self.commits = {} | |
96 self.files = {} | |
97 self.uuid = svn.ra.get_uuid(self.ra).decode(self.encoding) | |
98 except SubversionException, e: | |
99 raise NoRepo("couldn't open SVN repo %s" % url) | |
100 | |
101 try: | |
102 self.get_blacklist() | |
103 except IOError, e: | |
104 pass | |
105 | |
106 if not latest: | |
107 latest = svn.ra.get_latest_revnum(self.ra) | |
108 dirent = svn.ra.stat(self.ra, self.module, latest) | |
109 self.last_changed = dirent.created_rev | |
110 | |
111 self.head = self.rev(self.last_changed) | |
112 | |
113 # Should lazily fetch revisions in batches of, say, 1,000...: | |
114 self._fetch_revisions(from_revnum=self.last_changed, to_revnum=0) | |
115 | |
116 def rev(self, revnum): | |
117 return (u"svn:%s%s@%s" % (self.uuid, self.module, revnum)).decode(self.encoding) | |
118 | |
119 def get_blacklist(self): | |
120 """Avoid certain revision numbers. | |
121 It is not uncommon for two nearby revisions to cancel each other | |
122 out, e.g. 'I copied trunk into a subdirectory of itself instead | |
123 of making a branch'. The converted repository is significantly | |
124 smaller if we ignore such revisions.""" | |
125 self.blacklist = set() | |
126 blacklist = self.blacklist | |
127 for line in file("blacklist.txt", "r"): | |
128 if not line.startswith("#"): | |
129 try: | |
130 svn_rev = int(line.strip()) | |
131 blacklist.add(svn_rev) | |
132 except ValueError, e: | |
133 pass # not an integer or a comment | |
134 | |
135 def is_blacklisted(self, svn_rev): | |
136 return svn_rev in self.blacklist | |
137 | |
138 def reparent(self, module): | |
139 svn_url = self.base + module | |
140 self.ui.debug("reparent to %s\n" % svn_url.encode(self.encoding)) | |
141 svn.ra.reparent(self.ra, svn_url.encode(self.encoding)) | |
142 | |
143 def _fetch_revisions(self, from_revnum = 0, to_revnum = 347, pb=None): | |
144 self.parent_cset = None | |
145 self.child_cset = None | |
146 | |
147 self.ui.debug('Fetching revisions %d to %d\n' % (from_revnum, to_revnum)) | |
148 | |
149 def get_entry_from_path(path, module=self.module): | |
150 # Given the repository url of this wc, say | |
151 # "http://server/plone/CMFPlone/branches/Plone-2_0-branch" | |
152 # extract the "entry" portion (a relative path) from what | |
153 # svn log --xml says, ie | |
154 # "/CMFPlone/branches/Plone-2_0-branch/tests/PloneTestCase.py" | |
155 # that is to say "tests/PloneTestCase.py" | |
156 | |
157 if path.startswith(module): | |
158 relative = path[len(module):] | |
159 if relative.startswith('/'): | |
160 return relative[1:] | |
161 else: | |
162 return relative | |
163 | |
164 # The path is outside our tracked tree... | |
165 self.ui.debug('Ignoring %r since it is not under %r\n' % (path, module)) | |
166 return None | |
167 | |
168 received = [] | |
169 def rcvr(*arg, **args): | |
170 orig_paths, revnum, author, date, message, pool = arg | |
171 new_orig_paths = svn_paths(orig_paths) | |
172 rcvr2(new_orig_paths, revnum, author, date, message, pool) | |
173 | |
174 def rcvr2(orig_paths, revnum, author, date, message, pool, better_paths = None): | |
175 if not self.is_blacklisted(revnum): | |
176 received.append((orig_paths, revnum, author, date, message)) | |
177 | |
178 def after_received(orig_paths, revnum, author, date, message): | |
179 if revnum == 1172: | |
180 import pdb | |
181 pdb.set_trace() | |
182 if revnum in self.modulemap: | |
183 new_module = self.modulemap[revnum] | |
184 if new_module != self.module: | |
185 self.module = new_module | |
186 self.reparent(self.module) | |
187 | |
188 copyfrom = {} # Map of entrypath, revision for finding source of deleted revisions. | |
189 copies = {} | |
190 entries = [] | |
191 self.ui.debug("Parsing revision %d\n" % revnum) | |
192 if orig_paths is not None: | |
193 rev = self.rev(revnum) | |
194 try: | |
195 branch = self.module.split("/")[-1] | |
196 except IndexError: | |
197 branch = None | |
198 | |
199 for path in orig_paths: | |
200 # self.ui.write("path %s\n" % path) | |
201 if path == self.module: # Follow branching back in history | |
202 ent = orig_paths[path] | |
203 if ent: | |
204 if ent.copyfrom_path: | |
205 self.modulemap[ent.copyfrom_rev] = ent.copyfrom_path | |
206 else: | |
207 self.ui.debug("No copyfrom path, don't know what to do.\n") | |
208 # Maybe it was added and there is no more history. | |
209 entrypath = get_entry_from_path(path, module=self.module) | |
210 # self.ui.write("entrypath %s\n" % entrypath) | |
211 if not entrypath: | |
212 # Outside our area of interest | |
213 self.ui.debug("boring@%s: %s\n" % (revnum, path)) | |
214 continue | |
215 entry = entrypath.decode(self.encoding) | |
216 ent = orig_paths[path] | |
217 | |
218 kind = svn.ra.check_path(self.ra, entrypath, revnum) | |
219 if kind == svn.core.svn_node_file: | |
220 if ent.copyfrom_path: | |
221 copyfrom_path = get_entry_from_path(ent.copyfrom_path) | |
222 if copyfrom_path: | |
223 self.ui.debug("Copied to %s from %s@%s\n" % (entry, copyfrom_path, ent.copyfrom_rev)) | |
224 # It's probably important for hg that the source | |
225 # exists in the revision's parent, not just the | |
226 # ent.copyfrom_rev | |
227 fromkind = svn.ra.check_path(self.ra, copyfrom_path, ent.copyfrom_rev) | |
228 if fromkind != 0: | |
229 copies[self.recode(entry)] = self.recode(copyfrom_path) | |
230 entries.append(self.recode(entry)) | |
231 elif kind == 0: # gone, but had better be a deleted *file* | |
232 self.ui.debug("gone from %s\n" % ent.copyfrom_rev) | |
233 | |
234 fromrev = revnum - 1 | |
235 # might always need to be revnum - 1 in these 3 lines? | |
236 old_module = self.modulemap.get(fromrev, self.module) | |
237 basepath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
238 entrypath = old_module + "/" + get_entry_from_path(path, module=self.module) | |
239 | |
240 def lookup_parts(p): | |
241 rc = None | |
242 parts = p.split("/") | |
243 for i in range(len(parts)): | |
244 part = "/".join(parts[:i]) | |
245 info = part, copyfrom.get(part, None) | |
246 if info[1] is not None: | |
247 self.ui.debug("Found parent directory %s\n" % info) | |
248 rc = info | |
249 return rc | |
250 | |
251 self.ui.debug("base, entry %s %s\n" % (basepath, entrypath)) | |
252 | |
253 frompath, froment = lookup_parts(entrypath) or (None, revnum - 1) | |
254 | |
255 # need to remove fragment from lookup_parts and replace with copyfrom_path | |
256 if frompath is not None: | |
257 self.ui.debug("munge-o-matic\n") | |
258 self.ui.debug(entrypath + '\n') | |
259 self.ui.debug(entrypath[len(frompath):] + '\n') | |
260 entrypath = froment.copyfrom_path + entrypath[len(frompath):] | |
261 fromrev = froment.copyfrom_rev | |
262 self.ui.debug("Info: %s %s %s %s\n" % (frompath, froment, ent, entrypath)) | |
263 | |
264 fromkind = svn.ra.check_path(self.ra, entrypath, fromrev) | |
265 if fromkind == svn.core.svn_node_file: # a deleted file | |
266 entries.append(self.recode(entry)) | |
267 else: | |
268 # print "Deleted/moved non-file:", revnum, path, ent | |
269 # children = self._find_children(path, revnum - 1) | |
270 # print "find children %s@%d from %d action %s" % (path, revnum, ent.copyfrom_rev, ent.action) | |
271 # Sometimes this is tricky. For example: in | |
272 # The Subversion Repository revision 6940 a dir | |
273 # was copied and one of its files was deleted | |
274 # from the new location in the same commit. This | |
275 # code can't deal with that yet. | |
276 if ent.action == 'C': | |
277 children = self._find_children(path, fromrev) | |
278 else: | |
279 oroot = entrypath.strip('/') | |
280 nroot = path.strip('/') | |
281 children = self._find_children(oroot, fromrev) | |
282 children = [s.replace(oroot,nroot) for s in children] | |
283 # Mark all [files, not directories] as deleted. | |
284 for child in children: | |
285 # Can we move a child directory and its | |
286 # parent in the same commit? (probably can). Could | |
287 # cause problems if instead of revnum -1, | |
288 # we have to look in (copyfrom_path, revnum - 1) | |
289 entrypath = get_entry_from_path("/" + child, module=old_module) | |
290 if entrypath: | |
291 entry = self.recode(entrypath.decode(self.encoding)) | |
292 if entry in copies: | |
293 # deleted file within a copy | |
294 del copies[entry] | |
295 else: | |
296 entries.append(entry) | |
297 elif kind == svn.core.svn_node_dir: | |
298 # Should probably synthesize normal file entries | |
299 # and handle as above to clean up copy/rename handling. | |
300 | |
301 # If the directory just had a prop change, | |
302 # then we shouldn't need to look for its children. | |
303 # Also this could create duplicate entries. Not sure | |
304 # whether this will matter. Maybe should make entries a set. | |
305 # print "Changed directory", revnum, path, ent.action, ent.copyfrom_path, ent.copyfrom_rev | |
306 # This will fail if a directory was copied | |
307 # from another branch and then some of its files | |
308 # were deleted in the same transaction. | |
309 children = self._find_children(path, revnum) | |
310 children.sort() | |
311 for child in children: | |
312 # Can we move a child directory and its | |
313 # parent in the same commit? (probably can). Could | |
314 # cause problems if instead of revnum -1, | |
315 # we have to look in (copyfrom_path, revnum - 1) | |
316 entrypath = get_entry_from_path("/" + child, module=self.module) | |
317 # print child, self.module, entrypath | |
318 if entrypath: | |
319 # Need to filter out directories here... | |
320 kind = svn.ra.check_path(self.ra, entrypath, revnum) | |
321 if kind != svn.core.svn_node_dir: | |
322 entries.append(self.recode(entrypath)) | |
323 | |
324 # Copies here (must copy all from source) | |
325 # Probably not a real problem for us if | |
326 # source does not exist | |
327 | |
328 # Can do this with the copy command "hg copy" | |
329 # if ent.copyfrom_path: | |
330 # copyfrom_entry = get_entry_from_path(ent.copyfrom_path.decode(self.encoding), | |
331 # module=self.module) | |
332 # copyto_entry = entrypath | |
333 # | |
334 # print "copy directory", copyfrom_entry, 'to', copyto_entry | |
335 # | |
336 # copies.append((copyfrom_entry, copyto_entry)) | |
337 | |
338 if ent.copyfrom_path: | |
339 copyfrom_path = ent.copyfrom_path.decode(self.encoding) | |
340 copyfrom_entry = get_entry_from_path(copyfrom_path, module=self.module) | |
341 if copyfrom_entry: | |
342 copyfrom[path] = ent | |
343 self.ui.debug("mark %s came from %s\n" % (path, copyfrom[path])) | |
344 | |
345 # Good, /probably/ a regular copy. Really should check | |
346 # to see whether the parent revision actually contains | |
347 # the directory in question. | |
348 children = self._find_children(self.recode(copyfrom_path), ent.copyfrom_rev) | |
349 children.sort() | |
350 for child in children: | |
351 entrypath = get_entry_from_path("/" + child, module=self.module) | |
352 if entrypath: | |
353 entry = entrypath.decode(self.encoding) | |
354 # print "COPY COPY From", copyfrom_entry, entry | |
355 copyto_path = path + entry[len(copyfrom_entry):] | |
356 copyto_entry = get_entry_from_path(copyto_path, module=self.module) | |
357 # print "COPY", entry, "COPY To", copyto_entry | |
358 copies[self.recode(copyto_entry)] = self.recode(entry) | |
359 # copy from quux splort/quuxfile | |
360 | |
361 self.modulemap[revnum] = self.module # track backwards in time | |
362 # a list of (filename, id) where id lets us retrieve the file. | |
363 # eg in git, id is the object hash. for svn it'll be the | |
364 self.files[rev] = zip(entries, [rev] * len(entries)) | |
365 | |
366 # Example SVN datetime. Includes microseconds. | |
367 # ISO-8601 conformant | |
368 # '2007-01-04T17:35:00.902377Z' | |
369 date = util.parsedate(date[:18] + " UTC", ["%Y-%m-%dT%H:%M:%S"]) | |
370 | |
371 log = message and self.recode(message) or nocommitmsg | |
372 author = author and self.recode(author) or '' | |
373 | |
374 cset = commit(author=author, | |
375 date=util.datestr(date), | |
376 desc=log, | |
377 parents=[], | |
378 copies=copies, | |
379 branch=branch) | |
380 | |
381 if self.child_cset is not None: | |
382 self.child_cset.parents = [rev] | |
383 | |
384 self.child_cset = cset | |
385 | |
386 self.commits[rev] = cset | |
387 | |
388 try: | |
389 discover_changed_paths = True | |
390 strict_node_history = False | |
391 svn.ra.get_log(self.ra, [self.module], from_revnum, to_revnum, | |
392 0, discover_changed_paths, strict_node_history, rcvr) | |
393 for args in received: | |
394 after_received(*args) | |
395 self.last_revnum = to_revnum | |
396 except SubversionException, (_, num): | |
397 if num == svn.core.SVN_ERR_FS_NO_SUCH_REVISION: | |
398 raise NoSuchRevision(branch=self, | |
399 revision="Revision number %d" % to_revnum) | |
400 raise | |
401 | |
402 def getheads(self): | |
403 # svn-url@rev | |
404 # Not safe if someone committed: | |
405 self.heads = [self.head] | |
406 # print self.commits.keys() | |
407 return self.heads | |
408 | |
409 def _getfile(self, file, rev): | |
410 io = StringIO() | |
411 # TODO: ra.get_file transmits the whole file instead of diffs. | |
412 mode = '' | |
413 try: | |
414 revnum = int(rev.split("@")[-1]) | |
415 if self.module != self.modulemap[revnum]: | |
416 self.module = self.modulemap[revnum] | |
417 self.reparent(self.module) | |
418 info = svn.ra.get_file(self.ra, file, revnum, io) | |
419 if isinstance(info, list): | |
420 info = info[-1] | |
421 mode = ("svn:executable" in info) and 'x' or '' | |
422 mode = ("svn:special" in info) and 'l' or mode | |
423 except SubversionException, e: | |
424 notfound = (svn.core.SVN_ERR_FS_NOT_FOUND, | |
425 svn.core.SVN_ERR_RA_DAV_PATH_NOT_FOUND) | |
426 if e.apr_err in notfound: # File not found | |
427 raise IOError() | |
428 raise | |
429 data = io.getvalue() | |
430 if mode == 'l': | |
431 link_prefix = "link " | |
432 if data.startswith(link_prefix): | |
433 data = data[len(link_prefix):] | |
434 return data, mode | |
435 | |
436 def getfile(self, file, rev): | |
437 data, mode = self._getfile(file, rev) | |
438 self.modecache[(file, rev)] = mode | |
439 return data | |
440 | |
441 def getmode(self, file, rev): | |
442 return self.modecache[(file, rev)] | |
443 | |
444 def getchanges(self, rev): | |
445 self.modecache = {} | |
446 files = self.files[rev] | |
447 cl = files | |
448 cl.sort() | |
449 return cl | |
450 | |
451 def getcommit(self, rev): | |
452 return self.commits[rev] | |
453 | |
454 def gettags(self): | |
455 return [] | |
456 | |
457 def _find_children(self, path, revnum): | |
458 path = path.strip("/") | |
459 | |
460 def _find_children_fallback(path, revnum): | |
461 # SWIG python bindings for getdir are broken up to at least 1.4.3 | |
462 if not hasattr(self, 'client_ctx'): | |
463 self.client_ctx = svn.client.create_context() | |
464 optrev = svn.core.svn_opt_revision_t() | |
465 optrev.kind = svn.core.svn_opt_revision_number | |
466 optrev.value.number = revnum | |
467 rpath = '/'.join([self.url, path]).strip('/') | |
468 return ['%s/%s' % (path, x) for x in svn.client.ls(rpath, optrev, True, self.client_ctx).keys()] | |
469 | |
470 if hasattr(self, '_find_children_fallback'): | |
471 return _find_children_fallback(path, revnum) | |
472 | |
473 self.reparent("/" + path) | |
474 pool = Pool() | |
475 | |
476 children = [] | |
477 def find_children_inner(children, path, revnum = revnum): | |
478 if hasattr(svn.ra, 'get_dir2'): # Since SVN 1.4 | |
479 fields = 0xffffffff # Binding does not provide SVN_DIRENT_ALL | |
480 getdir = svn.ra.get_dir2(self.ra, path, revnum, fields, pool) | |
481 else: | |
482 getdir = svn.ra.get_dir(self.ra, path, revnum, pool) | |
483 if type(getdir) == dict: | |
484 # python binding for getdir is broken up to at least 1.4.3 | |
485 raise CompatibilityException() | |
486 dirents = getdir[0] | |
487 if type(dirents) == int: | |
488 # got here once due to infinite recursion bug | |
489 # pprint.pprint(getdir) | |
490 return | |
491 c = dirents.keys() | |
492 c.sort() | |
493 for child in c: | |
494 dirent = dirents[child] | |
495 if dirent.kind == svn.core.svn_node_dir: | |
496 find_children_inner(children, (path + "/" + child).strip("/")) | |
497 else: | |
498 children.append((path + "/" + child).strip("/")) | |
499 | |
500 try: | |
501 find_children_inner(children, "") | |
502 except CompatibilityException: | |
503 self._find_children_fallback = True | |
504 self.reparent(self.module) | |
505 return _find_children_fallback(path, revnum) | |
506 | |
507 self.reparent(self.module) | |
508 return [path + "/" + c for c in children] | |
509 | |
510 def recode(self, s): | |
511 return recode(self.encoding, s) |