comparison hgext/convert/cvsps.py @ 8756:6019e6517f95

convert: better support for CVS branchpoints (issue1447) This records the branches starting at individual CVS file revisions, using the symbolic names map rather than just the branches information. This information is used to generate Mercurial changesets. Despite the changes, the CVS conversion still suffers heavily from cvsps' deficiencies in generating a correct representation of the CVS repository history.
author Henrik Stuart <hg@hstuart.dk>
date Tue, 09 Jun 2009 08:59:49 +0200
parents 883f14fcd1df
children c487719cccef
comparison
equal deleted inserted replaced
8755:a2b4ddee3785 8756:6019e6517f95
34 .revision - revision number as tuple 34 .revision - revision number as tuple
35 .tags - list of tags on the file 35 .tags - list of tags on the file
36 .synthetic - is this a synthetic "file ... added on ..." revision? 36 .synthetic - is this a synthetic "file ... added on ..." revision?
37 .mergepoint- the branch that has been merged from 37 .mergepoint- the branch that has been merged from
38 (if present in rlog output) 38 (if present in rlog output)
39 .branchpoints- the branches that start at the current entry
39 ''' 40 '''
40 def __init__(self, **entries): 41 def __init__(self, **entries):
41 self.__dict__.update(entries) 42 self.__dict__.update(entries)
42 43
43 def __repr__(self): 44 def __repr__(self):
398 if revn > 3 and (revn % 2) == 0: 399 if revn > 3 and (revn % 2) == 0:
399 e.branch = tags.get(e.revision[:-1], [None])[0] 400 e.branch = tags.get(e.revision[:-1], [None])[0]
400 else: 401 else:
401 e.branch = None 402 e.branch = None
402 403
404 # find the branches starting from this revision
405 branchpoints = set()
406 for branch, revision in branchmap.iteritems():
407 revparts = tuple([int(i) for i in revision.split('.')])
408 if revparts[-2] == 0 and revparts[-1] % 2 == 0:
409 # normal branch
410 if revparts[:-2] == e.revision:
411 branchpoints.add(branch)
412 elif revparts == (1,1,1): # vendor branch
413 if revparts in e.branches:
414 branchpoints.add(branch)
415 e.branchpoints = branchpoints
416
403 log.append(e) 417 log.append(e)
404 418
405 if len(log) % 100 == 0: 419 if len(log) % 100 == 0:
406 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n') 420 ui.status(util.ellipsis('%d %s' % (len(log), e.file), 80)+'\n')
407 421
451 .parents - list of one or two parent changesets 465 .parents - list of one or two parent changesets
452 .tags - list of tags on this changeset 466 .tags - list of tags on this changeset
453 .synthetic - from synthetic revision "file ... added on branch ..." 467 .synthetic - from synthetic revision "file ... added on branch ..."
454 .mergepoint- the branch that has been merged from 468 .mergepoint- the branch that has been merged from
455 (if present in rlog output) 469 (if present in rlog output)
470 .branchpoints- the branches that start at the current entry
456 ''' 471 '''
457 def __init__(self, **entries): 472 def __init__(self, **entries):
458 self.__dict__.update(entries) 473 self.__dict__.update(entries)
459 474
460 def __repr__(self): 475 def __repr__(self):
475 files = set() 490 files = set()
476 c = None 491 c = None
477 for i, e in enumerate(log): 492 for i, e in enumerate(log):
478 493
479 # Check if log entry belongs to the current changeset or not. 494 # Check if log entry belongs to the current changeset or not.
495
496 # Since CVS is file centric, two different file revisions with
497 # different branchpoints should be treated as belonging to two
498 # different changesets (and the ordering is important and not
499 # honoured by cvsps at this point).
500 #
501 # Consider the following case:
502 # foo 1.1 branchpoints: [MYBRANCH]
503 # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
504 #
505 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
506 # later version of foo may be in MYBRANCH2, so foo should be the
507 # first changeset and bar the next and MYBRANCH and MYBRANCH2
508 # should both start off of the bar changeset. No provisions are
509 # made to ensure that this is, in fact, what happens.
480 if not (c and 510 if not (c and
481 e.comment == c.comment and 511 e.comment == c.comment and
482 e.author == c.author and 512 e.author == c.author and
483 e.branch == c.branch and 513 e.branch == c.branch and
514 e.branchpoints == c.branchpoints and
484 ((c.date[0] + c.date[1]) <= 515 ((c.date[0] + c.date[1]) <=
485 (e.date[0] + e.date[1]) <= 516 (e.date[0] + e.date[1]) <=
486 (c.date[0] + c.date[1]) + fuzz) and 517 (c.date[0] + c.date[1]) + fuzz) and
487 e.file not in files): 518 e.file not in files):
488 c = changeset(comment=e.comment, author=e.author, 519 c = changeset(comment=e.comment, author=e.author,
489 branch=e.branch, date=e.date, entries=[], 520 branch=e.branch, date=e.date, entries=[],
490 mergepoint=getattr(e, 'mergepoint', None)) 521 mergepoint=getattr(e, 'mergepoint', None),
522 branchpoints=getattr(e, 'branchpoints', set()))
491 changesets.append(c) 523 changesets.append(c)
492 files = set() 524 files = set()
493 if len(changesets) % 100 == 0: 525 if len(changesets) % 100 == 0:
494 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1]) 526 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
495 ui.status(util.ellipsis(t, 80) + '\n') 527 ui.status(util.ellipsis(t, 80) + '\n')
611 643
612 p = None 644 p = None
613 if c.branch in branches: 645 if c.branch in branches:
614 p = branches[c.branch] 646 p = branches[c.branch]
615 else: 647 else:
616 for f in c.entries: 648 # first changeset on a new branch
617 p = max(p, versions.get((f.rcs, f.parent), None)) 649 # the parent is a changeset with the branch in its
650 # branchpoints such that it is the latest possible
651 # commit without any intervening, unrelated commits.
652
653 for candidate in xrange(i):
654 if c.branch not in changesets[candidate].branchpoints:
655 if p is not None:
656 break
657 continue
658 p = candidate
618 659
619 c.parents = [] 660 c.parents = []
620 if p is not None: 661 if p is not None:
621 p = changesets[p] 662 p = changesets[p]
622 663
751 '%Y/%m/%d %H:%M:%S %1%2')) 792 '%Y/%m/%d %H:%M:%S %1%2'))
752 ui.write('Author: %s\n' % cs.author) 793 ui.write('Author: %s\n' % cs.author)
753 ui.write('Branch: %s\n' % (cs.branch or 'HEAD')) 794 ui.write('Branch: %s\n' % (cs.branch or 'HEAD'))
754 ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags)>1], 795 ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags)>1],
755 ','.join(cs.tags) or '(none)')) 796 ','.join(cs.tags) or '(none)'))
797 branchpoints = getattr(cs, 'branchpoints', None)
798 if branchpoints:
799 ui.write('Branchpoints: %s \n' % ', '.join(branchpoints))
756 if opts["parents"] and cs.parents: 800 if opts["parents"] and cs.parents:
757 if len(cs.parents)>1: 801 if len(cs.parents)>1:
758 ui.write('Parents: %s\n' % (','.join([str(p.id) for p in cs.parents]))) 802 ui.write('Parents: %s\n' % (','.join([str(p.id) for p in cs.parents])))
759 else: 803 else:
760 ui.write('Parent: %d\n' % cs.parents[0].id) 804 ui.write('Parent: %d\n' % cs.parents[0].id)