comparison mercurial/patch.py @ 14451:c78d41db6f88

patch: refactor file creation/removal detection The patcher has to know if a file is being created or removed to check if the target already exists, or to actually unlink the file when a hunk emptying it is applied. This was done by embedding the creation/removal information in the first (and only) hunk attached to the file. There are two problems with this approach: - creation/removal is really a property of the file being patched and not its hunk. - for regular patches, file creation cannot be deduced at parsing time: there are case where the *stripped* file paths must be compared. Modifying hunks after their creation is clumsy and prevent further refactorings related to copies handling. Instead, we delegate this job to selectfile() which has all the relevant information, and remove the hunk createfile() and rmfile() methods.
author Patrick Mezard <pmezard@gmail.com>
date Fri, 27 May 2011 21:50:09 +0200
parents cbe13e6bdc34
children ee574cfd0c32
comparison
equal deleted inserted replaced
14450:d1a1578c5f78 14451:c78d41db6f88
502 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@') 502 unidesc = re.compile('@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@')
503 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)') 503 contextdesc = re.compile('(---|\*\*\*) (\d+)(,(\d+))? (---|\*\*\*)')
504 eolmodes = ['strict', 'crlf', 'lf', 'auto'] 504 eolmodes = ['strict', 'crlf', 'lf', 'auto']
505 505
506 class patchfile(object): 506 class patchfile(object):
507 def __init__(self, ui, fname, backend, mode, missing=False, 507 def __init__(self, ui, fname, backend, mode, create, remove, missing=False,
508 eolmode='strict'): 508 eolmode='strict'):
509 self.fname = fname 509 self.fname = fname
510 self.eolmode = eolmode 510 self.eolmode = eolmode
511 self.eol = None 511 self.eol = None
512 self.backend = backend 512 self.backend = backend
513 self.ui = ui 513 self.ui = ui
514 self.lines = [] 514 self.lines = []
515 self.exists = False 515 self.exists = False
516 self.missing = missing 516 self.missing = missing
517 self.mode = mode 517 self.mode = mode
518 self.create = create
519 self.remove = remove
518 if not missing: 520 if not missing:
519 try: 521 try:
520 data, mode = self.backend.getfile(fname) 522 data, mode = self.backend.getfile(fname)
521 if data: 523 if data:
522 self.lines = data.splitlines(True) 524 self.lines = data.splitlines(True)
618 620
619 if self.missing: 621 if self.missing:
620 self.rej.append(h) 622 self.rej.append(h)
621 return -1 623 return -1
622 624
623 if self.exists and h.createfile(): 625 if self.exists and self.create:
624 self.ui.warn(_("file %s already exists\n") % self.fname) 626 self.ui.warn(_("file %s already exists\n") % self.fname)
625 self.rej.append(h) 627 self.rej.append(h)
626 return -1 628 return -1
627 629
628 if isinstance(h, binhunk): 630 if isinstance(h, binhunk):
629 if h.rmfile(): 631 if self.remove:
630 self.backend.unlink(self.fname) 632 self.backend.unlink(self.fname)
631 else: 633 else:
632 self.lines[:] = h.new() 634 self.lines[:] = h.new()
633 self.offset += len(h.new()) 635 self.offset += len(h.new())
634 self.dirty = True 636 self.dirty = True
652 orig_start = start 654 orig_start = start
653 # if there's skew we want to emit the "(offset %d lines)" even 655 # if there's skew we want to emit the "(offset %d lines)" even
654 # when the hunk cleanly applies at start + skew, so skip the 656 # when the hunk cleanly applies at start + skew, so skip the
655 # fast case code 657 # fast case code
656 if self.skew == 0 and diffhelpers.testhunk(old, self.lines, start) == 0: 658 if self.skew == 0 and diffhelpers.testhunk(old, self.lines, start) == 0:
657 if h.rmfile(): 659 if self.remove:
658 self.backend.unlink(self.fname) 660 self.backend.unlink(self.fname)
659 else: 661 else:
660 self.lines[start : start + h.lena] = h.new() 662 self.lines[start : start + h.lena] = h.new()
661 self.offset += h.lenb - h.lena 663 self.offset += h.lenb - h.lena
662 self.dirty = True 664 self.dirty = True
708 self.writelines(self.fname, self.lines, self.mode) 710 self.writelines(self.fname, self.lines, self.mode)
709 self.write_rej() 711 self.write_rej()
710 return len(self.rej) 712 return len(self.rej)
711 713
712 class hunk(object): 714 class hunk(object):
713 def __init__(self, desc, num, lr, context, create=False, remove=False): 715 def __init__(self, desc, num, lr, context):
714 self.number = num 716 self.number = num
715 self.desc = desc 717 self.desc = desc
716 self.hunk = [desc] 718 self.hunk = [desc]
717 self.a = [] 719 self.a = []
718 self.b = [] 720 self.b = []
721 if lr is not None: 723 if lr is not None:
722 if context: 724 if context:
723 self.read_context_hunk(lr) 725 self.read_context_hunk(lr)
724 else: 726 else:
725 self.read_unified_hunk(lr) 727 self.read_unified_hunk(lr)
726 self.create = create
727 self.remove = remove and not create
728 728
729 def getnormalized(self): 729 def getnormalized(self):
730 """Return a copy with line endings normalized to LF.""" 730 """Return a copy with line endings normalized to LF."""
731 731
732 def normalize(lines): 732 def normalize(lines):
736 line = line[:-2] + '\n' 736 line = line[:-2] + '\n'
737 nlines.append(line) 737 nlines.append(line)
738 return nlines 738 return nlines
739 739
740 # Dummy object, it is rebuilt manually 740 # Dummy object, it is rebuilt manually
741 nh = hunk(self.desc, self.number, None, None, False, False) 741 nh = hunk(self.desc, self.number, None, None)
742 nh.number = self.number 742 nh.number = self.number
743 nh.desc = self.desc 743 nh.desc = self.desc
744 nh.hunk = self.hunk 744 nh.hunk = self.hunk
745 nh.a = normalize(self.a) 745 nh.a = normalize(self.a)
746 nh.b = normalize(self.b) 746 nh.b = normalize(self.b)
747 nh.starta = self.starta 747 nh.starta = self.starta
748 nh.startb = self.startb 748 nh.startb = self.startb
749 nh.lena = self.lena 749 nh.lena = self.lena
750 nh.lenb = self.lenb 750 nh.lenb = self.lenb
751 nh.create = self.create
752 nh.remove = self.remove
753 return nh 751 return nh
754 752
755 def read_unified_hunk(self, lr): 753 def read_unified_hunk(self, lr):
756 m = unidesc.match(self.desc) 754 m = unidesc.match(self.desc)
757 if not m: 755 if not m:
889 lr.push(l) 887 lr.push(l)
890 888
891 def complete(self): 889 def complete(self):
892 return len(self.a) == self.lena and len(self.b) == self.lenb 890 return len(self.a) == self.lena and len(self.b) == self.lenb
893 891
894 def createfile(self):
895 return self.starta == 0 and self.lena == 0 and self.create
896
897 def rmfile(self):
898 return self.startb == 0 and self.lenb == 0 and self.remove
899
900 def fuzzit(self, l, fuzz, toponly): 892 def fuzzit(self, l, fuzz, toponly):
901 # this removes context lines from the top and bottom of list 'l'. It 893 # this removes context lines from the top and bottom of list 'l'. It
902 # checks the hunk to make sure only context lines are removed, and then 894 # checks the hunk to make sure only context lines are removed, and then
903 # returns a new shortened list of lines. 895 # returns a new shortened list of lines.
904 fuzz = min(fuzz, len(l)-1) 896 fuzz = min(fuzz, len(l)-1)
940 def new(self, fuzz=0, toponly=False): 932 def new(self, fuzz=0, toponly=False):
941 return self.fuzzit(self.b, fuzz, toponly) 933 return self.fuzzit(self.b, fuzz, toponly)
942 934
943 class binhunk: 935 class binhunk:
944 'A binary patch file. Only understands literals so far.' 936 'A binary patch file. Only understands literals so far.'
945 def __init__(self, gitpatch, lr): 937 def __init__(self, lr):
946 self.gitpatch = gitpatch
947 self.text = None 938 self.text = None
948 self.hunk = ['GIT binary patch\n'] 939 self.hunk = ['GIT binary patch\n']
949 self._read(lr) 940 self._read(lr)
950
951 def createfile(self):
952 return self.gitpatch.op == 'ADD'
953
954 def rmfile(self):
955 return self.gitpatch.op == 'DELETE'
956 941
957 def complete(self): 942 def complete(self):
958 return self.text is not None 943 return self.text is not None
959 944
960 def new(self): 945 def new(self):
1018 def selectfile(backend, afile_orig, bfile_orig, hunk, strip, gp): 1003 def selectfile(backend, afile_orig, bfile_orig, hunk, strip, gp):
1019 if gp: 1004 if gp:
1020 # Git patches do not play games. Excluding copies from the 1005 # Git patches do not play games. Excluding copies from the
1021 # following heuristic avoids a lot of confusion 1006 # following heuristic avoids a lot of confusion
1022 fname = pathstrip(gp.path, strip - 1)[1] 1007 fname = pathstrip(gp.path, strip - 1)[1]
1023 missing = not hunk.createfile() and not backend.exists(fname) 1008 create = gp.op == 'ADD'
1024 return fname, missing 1009 remove = gp.op == 'DELETE'
1010 missing = not create and not backend.exists(fname)
1011 return fname, missing, create, remove
1025 nulla = afile_orig == "/dev/null" 1012 nulla = afile_orig == "/dev/null"
1026 nullb = bfile_orig == "/dev/null" 1013 nullb = bfile_orig == "/dev/null"
1014 create = nulla and hunk.starta == 0 and hunk.lena == 0
1015 remove = nullb and hunk.startb == 0 and hunk.lenb == 0
1027 abase, afile = pathstrip(afile_orig, strip) 1016 abase, afile = pathstrip(afile_orig, strip)
1028 gooda = not nulla and backend.exists(afile) 1017 gooda = not nulla and backend.exists(afile)
1029 bbase, bfile = pathstrip(bfile_orig, strip) 1018 bbase, bfile = pathstrip(bfile_orig, strip)
1030 if afile == bfile: 1019 if afile == bfile:
1031 goodb = gooda 1020 goodb = gooda
1032 else: 1021 else:
1033 goodb = not nullb and backend.exists(bfile) 1022 goodb = not nullb and backend.exists(bfile)
1034 createfunc = hunk.createfile 1023 missing = not goodb and not gooda and not create
1035 missing = not goodb and not gooda and not createfunc()
1036 1024
1037 # some diff programs apparently produce patches where the afile is 1025 # some diff programs apparently produce patches where the afile is
1038 # not /dev/null, but afile starts with bfile 1026 # not /dev/null, but afile starts with bfile
1039 abasedir = afile[:afile.rfind('/') + 1] 1027 abasedir = afile[:afile.rfind('/') + 1]
1040 bbasedir = bfile[:bfile.rfind('/') + 1] 1028 bbasedir = bfile[:bfile.rfind('/') + 1]
1041 if missing and abasedir == bbasedir and afile.startswith(bfile): 1029 if (missing and abasedir == bbasedir and afile.startswith(bfile)
1042 # this isn't very pretty 1030 and hunk.starta == 0 and hunk.lena == 0):
1043 hunk.create = True 1031 create = True
1044 if createfunc(): 1032 missing = False
1045 missing = False
1046 else:
1047 hunk.create = False
1048 1033
1049 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the 1034 # If afile is "a/b/foo" and bfile is "a/b/foo.orig" we assume the
1050 # diff is between a file and its backup. In this case, the original 1035 # diff is between a file and its backup. In this case, the original
1051 # file should be patched (see original mpatch code). 1036 # file should be patched (see original mpatch code).
1052 isbackup = (abase == bbase and bfile.startswith(afile)) 1037 isbackup = (abase == bbase and bfile.startswith(afile))
1063 elif not nulla: 1048 elif not nulla:
1064 fname = afile 1049 fname = afile
1065 else: 1050 else:
1066 raise PatchError(_("undefined source and destination files")) 1051 raise PatchError(_("undefined source and destination files"))
1067 1052
1068 return fname, missing 1053 return fname, missing, create, remove
1069 1054
1070 def scangitpatch(lr, firstline): 1055 def scangitpatch(lr, firstline):
1071 """ 1056 """
1072 Git patches can emit: 1057 Git patches can emit:
1073 - rename a to b 1058 - rename a to b
1123 or x.startswith('GIT binary patch')): 1108 or x.startswith('GIT binary patch')):
1124 gp = None 1109 gp = None
1125 if gitpatches and gitpatches[-1][0] == bfile: 1110 if gitpatches and gitpatches[-1][0] == bfile:
1126 gp = gitpatches.pop()[1] 1111 gp = gitpatches.pop()[1]
1127 if x.startswith('GIT binary patch'): 1112 if x.startswith('GIT binary patch'):
1128 h = binhunk(gp, lr) 1113 h = binhunk(lr)
1129 else: 1114 else:
1130 if context is None and x.startswith('***************'): 1115 if context is None and x.startswith('***************'):
1131 context = True 1116 context = True
1132 create = afile == '/dev/null' or gp and gp.op == 'ADD' 1117 h = hunk(x, hunknum + 1, lr, context)
1133 remove = bfile == '/dev/null' or gp and gp.op == 'DELETE'
1134 h = hunk(x, hunknum + 1, lr, context, create, remove)
1135 hunknum += 1 1118 hunknum += 1
1136 if emitfile: 1119 if emitfile:
1137 emitfile = False 1120 emitfile = False
1138 yield 'file', (afile, bfile, h, gp) 1121 yield 'file', (afile, bfile, h, gp)
1139 yield 'hunk', h 1122 yield 'hunk', h
1248 backend.setfile(path, data, gp.mode) 1231 backend.setfile(path, data, gp.mode)
1249 if not first_hunk: 1232 if not first_hunk:
1250 continue 1233 continue
1251 try: 1234 try:
1252 mode = gp and gp.mode or None 1235 mode = gp and gp.mode or None
1253 current_file, missing = selectfile(backend, afile, bfile, 1236 current_file, missing, create, remove = selectfile(
1254 first_hunk, strip, gp) 1237 backend, afile, bfile, first_hunk, strip, gp)
1255 current_file = patcher(ui, current_file, backend, mode, 1238 current_file = patcher(ui, current_file, backend, mode,
1256 missing=missing, eolmode=eolmode) 1239 create, remove, missing=missing,
1240 eolmode=eolmode)
1257 except PatchError, inst: 1241 except PatchError, inst:
1258 ui.warn(str(inst) + '\n') 1242 ui.warn(str(inst) + '\n')
1259 current_file = None 1243 current_file = None
1260 rejects += 1 1244 rejects += 1
1261 continue 1245 continue
1384 changed.add(pathstrip(gp.path, strip - 1)[1]) 1368 changed.add(pathstrip(gp.path, strip - 1)[1])
1385 if gp.op == 'RENAME': 1369 if gp.op == 'RENAME':
1386 changed.add(pathstrip(gp.oldpath, strip - 1)[1]) 1370 changed.add(pathstrip(gp.oldpath, strip - 1)[1])
1387 if not first_hunk: 1371 if not first_hunk:
1388 continue 1372 continue
1389 current_file, missing = selectfile(backend, afile, bfile, 1373 current_file, missing, create, remove = selectfile(
1390 first_hunk, strip, gp) 1374 backend, afile, bfile, first_hunk, strip, gp)
1391 changed.add(current_file) 1375 changed.add(current_file)
1392 elif state not in ('hunk', 'git'): 1376 elif state not in ('hunk', 'git'):
1393 raise util.Abort(_('unsupported parser state: %s') % state) 1377 raise util.Abort(_('unsupported parser state: %s') % state)
1394 return changed 1378 return changed
1395 finally: 1379 finally: