comparison hgext/convert/cvsps.py @ 18261:1b7b5975793f

cvsps: use commitids (when present) to detect changesets Simplify core logic by no longer attempting to work around missing class attributes. Instead always generate the attributes and ignore the cache if the attributes are missing
author Frank Kingswood <frank@kingswood-consulting.co.uk>
date Tue, 08 Jan 2013 20:11:20 +0000
parents a08775ec89f2
children 246f290e162a
comparison
equal deleted inserted replaced
18260:580274a1f1c2 18261:1b7b5975793f
17 '''Class logentry has the following attributes: 17 '''Class logentry has the following attributes:
18 .author - author name as CVS knows it 18 .author - author name as CVS knows it
19 .branch - name of branch this revision is on 19 .branch - name of branch this revision is on
20 .branches - revision tuple of branches starting at this revision 20 .branches - revision tuple of branches starting at this revision
21 .comment - commit message 21 .comment - commit message
22 .commitid - CVS commitid or None
22 .date - the commit date as a (time, tz) tuple 23 .date - the commit date as a (time, tz) tuple
23 .dead - true if file revision is dead 24 .dead - true if file revision is dead
24 .file - Name of file 25 .file - Name of file
25 .lines - a tuple (+lines, -lines) or None 26 .lines - a tuple (+lines, -lines) or None
26 .parent - Previous revision of this entry 27 .parent - Previous revision of this entry
27 .rcs - name of file as returned from CVS 28 .rcs - name of file as returned from CVS
28 .revision - revision number as tuple 29 .revision - revision number as tuple
29 .tags - list of tags on the file 30 .tags - list of tags on the file
30 .synthetic - is this a synthetic "file ... added on ..." revision? 31 .synthetic - is this a synthetic "file ... added on ..." revision?
31 .mergepoint- the branch that has been merged from 32 .mergepoint - the branch that has been merged from (if present in
32 (if present in rlog output) 33 rlog output) or None
33 .branchpoints- the branches that start at the current entry 34 .branchpoints - the branches that start at the current entry or empty
34 ''' 35 '''
35 def __init__(self, **entries): 36 def __init__(self, **entries):
36 self.synthetic = False 37 self.synthetic = False
37 self.__dict__.update(entries) 38 self.__dict__.update(entries)
38 39
39 def __repr__(self): 40 def __repr__(self):
40 return "<%s at 0x%x: %s %s>" % (self.__class__.__name__, 41 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
41 id(self), 42 return "%s(%s)"%(type(self).__name__, ", ".join(items))
42 self.file,
43 ".".join(map(str, self.revision)))
44 43
45 class logerror(Exception): 44 class logerror(Exception):
46 pass 45 pass
47 46
48 def getrepopath(cvspath): 47 def getrepopath(cvspath):
111 re_32 = re.compile('=======================================' 110 re_32 = re.compile('======================================='
112 '======================================$') 111 '======================================$')
113 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$') 112 re_50 = re.compile('revision ([\\d.]+)(\s+locked by:\s+.+;)?$')
114 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);' 113 re_60 = re.compile(r'date:\s+(.+);\s+author:\s+(.+);\s+state:\s+(.+?);'
115 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?' 114 r'(\s+lines:\s+(\+\d+)?\s+(-\d+)?;)?'
115 r'(\s+commitid:\s+([^;]+);)?'
116 r'(.*mergepoint:\s+([^;]+);)?') 116 r'(.*mergepoint:\s+([^;]+);)?')
117 re_70 = re.compile('branches: (.+);$') 117 re_70 = re.compile('branches: (.+);$')
118 118
119 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch') 119 file_added_re = re.compile(r'file [^/]+ was (initially )?added on branch')
120 120
169 169
170 if cache == 'update': 170 if cache == 'update':
171 try: 171 try:
172 ui.note(_('reading cvs log cache %s\n') % cachefile) 172 ui.note(_('reading cvs log cache %s\n') % cachefile)
173 oldlog = pickle.load(open(cachefile)) 173 oldlog = pickle.load(open(cachefile))
174 for e in oldlog:
175 if not (util.safehasattr(e, 'branchpoints') and
176 util.safehasattr(e, 'commitid') and
177 util.safehasattr(e, 'mergepoint')):
178 ui.status(_('ignoring old cache\n'))
179 oldlog = []
180 break
181
174 ui.note(_('cache has %d log entries\n') % len(oldlog)) 182 ui.note(_('cache has %d log entries\n') % len(oldlog))
175 except Exception, e: 183 except Exception, e:
176 ui.note(_('error reading cache: %r\n') % e) 184 ui.note(_('error reading cache: %r\n') % e)
177 185
178 if oldlog: 186 if oldlog:
296 # as this state is re-entered for subsequent revisions of a file. 304 # as this state is re-entered for subsequent revisions of a file.
297 match = re_50.match(line) 305 match = re_50.match(line)
298 assert match, _('expected revision number') 306 assert match, _('expected revision number')
299 e = logentry(rcs=scache(rcs), file=scache(filename), 307 e = logentry(rcs=scache(rcs), file=scache(filename),
300 revision=tuple([int(x) for x in match.group(1).split('.')]), 308 revision=tuple([int(x) for x in match.group(1).split('.')]),
301 branches=[], parent=None) 309 branches=[], parent=None, commitid=None, mergepoint=None, branchpoints=set())
310
302 state = 6 311 state = 6
303 312
304 elif state == 6: 313 elif state == 6:
305 # expecting date, author, state, lines changed 314 # expecting date, author, state, lines changed
306 match = re_60.match(line) 315 match = re_60.match(line)
327 elif match.group(6): 336 elif match.group(6):
328 e.lines = (0, int(match.group(6))) 337 e.lines = (0, int(match.group(6)))
329 else: 338 else:
330 e.lines = None 339 e.lines = None
331 340
332 if match.group(7): # cvsnt mergepoint 341 if match.group(7): # cvs 1.12 commitid
333 myrev = match.group(8).split('.') 342 e.commitid = match.group(8)
343
344 if match.group(9): # cvsnt mergepoint
345 myrev = match.group(10).split('.')
334 if len(myrev) == 2: # head 346 if len(myrev) == 2: # head
335 e.mergepoint = 'HEAD' 347 e.mergepoint = 'HEAD'
336 else: 348 else:
337 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]]) 349 myrev = '.'.join(myrev[:-2] + ['0', myrev[-2]])
338 branches = [b for b in branchmap if branchmap[b] == myrev] 350 branches = [b for b in branchmap if branchmap[b] == myrev]
339 assert len(branches) == 1, ('unknown branch: %s' 351 assert len(branches) == 1, ('unknown branch: %s'
340 % e.mergepoint) 352 % e.mergepoint)
341 e.mergepoint = branches[0] 353 e.mergepoint = branches[0]
342 else: 354
343 e.mergepoint = None
344 e.comment = [] 355 e.comment = []
345 state = 7 356 state = 7
346 357
347 elif state == 7: 358 elif state == 7:
348 # read the revision numbers of branches that start at this revision 359 # read the revision numbers of branches that start at this revision
467 '''Class changeset has the following attributes: 478 '''Class changeset has the following attributes:
468 .id - integer identifying this changeset (list index) 479 .id - integer identifying this changeset (list index)
469 .author - author name as CVS knows it 480 .author - author name as CVS knows it
470 .branch - name of branch this changeset is on, or None 481 .branch - name of branch this changeset is on, or None
471 .comment - commit message 482 .comment - commit message
483 .commitid - CVS commitid or None
472 .date - the commit date as a (time,tz) tuple 484 .date - the commit date as a (time,tz) tuple
473 .entries - list of logentry objects in this changeset 485 .entries - list of logentry objects in this changeset
474 .parents - list of one or two parent changesets 486 .parents - list of one or two parent changesets
475 .tags - list of tags on this changeset 487 .tags - list of tags on this changeset
476 .synthetic - from synthetic revision "file ... added on branch ..." 488 .synthetic - from synthetic revision "file ... added on branch ..."
477 .mergepoint- the branch that has been merged from 489 .mergepoint- the branch that has been merged from or None
478 (if present in rlog output) 490 .branchpoints- the branches that start at the current entry or empty
479 .branchpoints- the branches that start at the current entry
480 ''' 491 '''
481 def __init__(self, **entries): 492 def __init__(self, **entries):
482 self.synthetic = False 493 self.synthetic = False
483 self.__dict__.update(entries) 494 self.__dict__.update(entries)
484 495
485 def __repr__(self): 496 def __repr__(self):
486 return "<%s at 0x%x: %s>" % (self.__class__.__name__, 497 items = ("%s=%r"%(k, self.__dict__[k]) for k in sorted(self.__dict__))
487 id(self), 498 return "%s(%s)"%(type(self).__name__, ", ".join(items))
488 getattr(self, 'id', "(no id)"))
489 499
490 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None): 500 def createchangeset(ui, log, fuzz=60, mergefrom=None, mergeto=None):
491 '''Convert log into changesets.''' 501 '''Convert log into changesets.'''
492 502
493 ui.status(_('creating changesets\n')) 503 ui.status(_('creating changesets\n'))
494 504
495 # Merge changesets 505 # Merge changesets
496 506 log.sort(key=lambda x: (x.commitid, x.comment, x.author, x.branch, x.date, x.branchpoints))
497 log.sort(key=lambda x: (x.comment, x.author, x.branch, x.date))
498 507
499 changesets = [] 508 changesets = []
500 files = set() 509 files = set()
501 c = None 510 c = None
502 for i, e in enumerate(log): 511 for i, e in enumerate(log):
515 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a 524 # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
516 # later version of foo may be in MYBRANCH2, so foo should be the 525 # later version of foo may be in MYBRANCH2, so foo should be the
517 # first changeset and bar the next and MYBRANCH and MYBRANCH2 526 # first changeset and bar the next and MYBRANCH and MYBRANCH2
518 # should both start off of the bar changeset. No provisions are 527 # should both start off of the bar changeset. No provisions are
519 # made to ensure that this is, in fact, what happens. 528 # made to ensure that this is, in fact, what happens.
520 if not (c and 529 if not (c and e.branchpoints == c.branchpoints and
521 e.comment == c.comment and 530 ( # cvs commitids
522 e.author == c.author and 531 (e.commitid is not None and e.commitid == c.commitid)
523 e.branch == c.branch and 532 or
524 (not util.safehasattr(e, 'branchpoints') or 533 ( # no commitids, use fuzzy commit detection
525 not util.safehasattr (c, 'branchpoints') or 534 (e.commitid is None or c.commitid is None) and
526 e.branchpoints == c.branchpoints) and 535 e.comment == c.comment and
527 ((c.date[0] + c.date[1]) <= 536 e.author == c.author and
528 (e.date[0] + e.date[1]) <= 537 e.branch == c.branch and
529 (c.date[0] + c.date[1]) + fuzz) and 538 ((c.date[0] + c.date[1]) <=
530 e.file not in files): 539 (e.date[0] + e.date[1]) <=
540 (c.date[0] + c.date[1]) + fuzz) and
541 e.file not in files
542 )
543 )):
531 c = changeset(comment=e.comment, author=e.author, 544 c = changeset(comment=e.comment, author=e.author,
532 branch=e.branch, date=e.date, entries=[], 545 branch=e.branch, date=e.date,
533 mergepoint=getattr(e, 'mergepoint', None), 546 entries=[], mergepoint=e.mergepoint,
534 branchpoints=getattr(e, 'branchpoints', set())) 547 branchpoints=e.branchpoints, commitid=e.commitid)
535 changesets.append(c) 548 changesets.append(c)
549
536 files = set() 550 files = set()
537 if len(changesets) % 100 == 0: 551 if len(changesets) % 100 == 0:
538 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1]) 552 t = '%d %s' % (len(changesets), repr(e.comment)[1:-1])
539 ui.status(util.ellipsis(t, 80) + '\n') 553 ui.status(util.ellipsis(t, 80) + '\n')
540 554
806 '%Y/%m/%d %H:%M:%S %1%2'))) 820 '%Y/%m/%d %H:%M:%S %1%2')))
807 ui.write(('Author: %s\n' % cs.author)) 821 ui.write(('Author: %s\n' % cs.author))
808 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD'))) 822 ui.write(('Branch: %s\n' % (cs.branch or 'HEAD')))
809 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1], 823 ui.write(('Tag%s: %s \n' % (['', 's'][len(cs.tags) > 1],
810 ','.join(cs.tags) or '(none)'))) 824 ','.join(cs.tags) or '(none)')))
811 branchpoints = getattr(cs, 'branchpoints', None) 825 if cs.branchpoints:
812 if branchpoints: 826 ui.write('Branchpoints: %s \n' % ', '.join(cs.branchpoints))
813 ui.write(('Branchpoints: %s \n' % ', '.join(branchpoints)))
814 if opts["parents"] and cs.parents: 827 if opts["parents"] and cs.parents:
815 if len(cs.parents) > 1: 828 if len(cs.parents) > 1:
816 ui.write(('Parents: %s\n' % 829 ui.write(('Parents: %s\n' %
817 (','.join([str(p.id) for p in cs.parents])))) 830 (','.join([str(p.id) for p in cs.parents]))))
818 else: 831 else: