Mercurial > hg-stable
view mercurial/hg.py @ 146:4a828422247d
Handle merge with deletions
If you merge with a repo that has deleted a file after editing it, hg
attempted to resolve the file. This (correctly) resulted in hg verify
errors because the resolved version didn't show up in the manifests.
This moves the manifest resolution before file resolution and decides
which files to resolve based on the (partially) resolved manifest.
After files are resolved, the final manifest is committed.
author | mpm@selenic.com |
---|---|
date | Tue, 24 May 2005 20:30:35 -0800 |
parents | ea9188538222 |
children | c32286d0a665 |
line wrap: on
line source
# hg.py - repository classes for mercurial # # Copyright 2005 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms # of the GNU General Public License, incorporated herein by reference. import sys, struct, sha, socket, os, time, re, urllib2 import urllib from mercurial import byterange from mercurial.transaction import * from mercurial.revlog import * from difflib import SequenceMatcher class filelog(revlog): def __init__(self, opener, path): revlog.__init__(self, opener, os.path.join("data", path + ".i"), os.path.join("data", path + ".d")) def read(self, node): return self.revision(node) def add(self, text, transaction, link, p1=None, p2=None): return self.addrevision(text, transaction, link, p1, p2) def annotate(self, node): revs = [] while node != nullid: revs.append(node) node = self.parents(node)[0] revs.reverse() prev = [] annotate = [] for node in revs: curr = self.read(node).splitlines(1) linkrev = self.linkrev(node) sm = SequenceMatcher(None, prev, curr) new = [] for o, m, n, s, t in sm.get_opcodes(): if o == 'equal': new += annotate[m:n] else: new += [(linkrev, l) for l in curr[s:t]] annotate, prev = new, curr return annotate class manifest(revlog): def __init__(self, opener): self.mapcache = None self.listcache = None self.addlist = None revlog.__init__(self, opener, "00manifest.i", "00manifest.d") def read(self, node): if self.mapcache and self.mapcache[0] == node: return self.mapcache[1].copy() text = self.revision(node) map = {} self.listcache = (text, text.splitlines(1)) for l in self.listcache[1]: (f, n) = l.split('\0') map[f] = bin(n[:40]) self.mapcache = (node, map) return map def diff(self, a, b): # this is sneaky, as we're not actually using a and b if self.listcache and self.addlist and self.listcache[0] == a: d = mdiff.diff(self.listcache[1], self.addlist, 1) if mdiff.patch(a, d) != b: sys.stderr.write("*** sortdiff failed, falling back ***\n") return mdiff.textdiff(a, b) return d else: return mdiff.textdiff(a, b) def add(self, map, transaction, link, p1=None, p2=None): files = map.keys() files.sort() self.addlist = ["%s\000%s\n" % (f, hex(map[f])) for f in files] text = "".join(self.addlist) n = self.addrevision(text, transaction, link, p1, p2) self.mapcache = (n, map) self.listcache = (text, self.addlist) self.addlist = None return n class changelog(revlog): def __init__(self, opener): revlog.__init__(self, opener, "00changelog.i", "00changelog.d") def extract(self, text): if not text: return (nullid, "", "0", [], "") last = text.index("\n\n") desc = text[last + 2:] l = text[:last].splitlines() manifest = bin(l[0]) user = l[1] date = l[2] files = l[3:] return (manifest, user, date, files, desc) def read(self, node): return self.extract(self.revision(node)) def add(self, manifest, list, desc, transaction, p1=None, p2=None): user = (os.environ.get("HGUSER") or os.environ.get("EMAIL") or os.environ.get("LOGNAME", "unknown") + '@' + socket.getfqdn()) date = "%d %d" % (time.time(), time.timezone) list.sort() l = [hex(manifest), user, date] + list + ["", desc] text = "\n".join(l) return self.addrevision(text, transaction, self.count(), p1, p2) class dircache: def __init__(self, opener, ui): self.opener = opener self.dirty = 0 self.ui = ui self.map = None def __del__(self): if self.dirty: self.write() def __getitem__(self, key): try: return self.map[key] except TypeError: self.read() return self[key] def read(self): if self.map is not None: return self.map self.map = {} try: st = self.opener("dircache").read() except: return pos = 0 while pos < len(st): e = struct.unpack(">llll", st[pos:pos+16]) l = e[3] pos += 16 f = st[pos:pos + l] self.map[f] = e[:3] pos += l def update(self, files): if not files: return self.read() self.dirty = 1 for f in files: try: s = os.stat(f) self.map[f] = (s.st_mode, s.st_size, s.st_mtime) except IOError: self.remove(f) def taint(self, files): if not files: return self.read() self.dirty = 1 for f in files: self.map[f] = (0, -1, 0) def remove(self, files): if not files: return self.read() self.dirty = 1 for f in files: try: del self.map[f] except KeyError: self.ui.warn("Not in dircache: %s\n" % f) pass def clear(self): self.map = {} self.dirty = 1 def write(self): st = self.opener("dircache", "w") for f, e in self.map.items(): e = struct.pack(">llll", e[0], e[1], e[2], len(f)) st.write(e + f) self.dirty = 0 def copy(self): self.read() return self.map.copy() # used to avoid circular references so destructors work def opener(base): p = base def o(path, mode="r"): if p[:7] == "http://": f = os.path.join(p, urllib.quote(path)) return httprangereader(f) f = os.path.join(p, path) if mode != "r": try: s = os.stat(f) except OSError: d = os.path.dirname(f) if not os.path.isdir(d): os.makedirs(d) else: if s.st_nlink > 1: file(f + ".tmp", "w").write(file(f).read()) os.rename(f+".tmp", f) return file(f, mode) return o class localrepository: def __init__(self, ui, path=None, create=0): self.remote = 0 if path and path[:7] == "http://": self.remote = 1 self.path = path else: if not path: p = os.getcwd() while not os.path.isdir(os.path.join(p, ".hg")): p = os.path.dirname(p) if p == "/": raise "No repo found" path = p self.path = os.path.join(path, ".hg") self.root = path self.ui = ui if create: os.mkdir(self.path) os.mkdir(self.join("data")) self.opener = opener(self.path) self.manifest = manifest(self.opener) self.changelog = changelog(self.opener) self.ignorelist = None self.tags = None if not self.remote: self.dircache = dircache(self.opener, ui) try: self.current = bin(self.opener("current").read()) except IOError: self.current = None def setcurrent(self, node): self.current = node self.opener("current", "w").write(hex(node)) def ignore(self, f): if self.ignorelist is None: self.ignorelist = [] try: l = open(os.path.join(self.root, ".hgignore")) for pat in l: if pat != "\n": self.ignorelist.append(re.compile(pat[:-1])) except IOError: pass for pat in self.ignorelist: if pat.search(f): return True return False def lookup(self, key): if self.tags is None: self.tags = {} try: fl = self.file(".hgtags") for l in fl.revision(fl.tip()).splitlines(): if l: n, k = l.split(" ") self.tags[k] = bin(n) except KeyError: pass try: return self.tags[key] except KeyError: return self.changelog.lookup(key) def join(self, f): return os.path.join(self.path, f) def file(self, f): return filelog(self.opener, f) def transaction(self): return transaction(self.opener, self.join("journal"), self.join("undo")) def commit(self, parent, update = None, text = ""): tr = self.transaction() try: remove = [ l[:-1] for l in self.opener("to-remove") ] os.unlink(self.join("to-remove")) except IOError: remove = [] if update == None: update = self.diffdir(self.root, parent)[0] # check in files new = {} linkrev = self.changelog.count() update.sort() for f in update: self.ui.note(f + "\n") try: t = file(f).read() except IOError: remove.append(f) continue r = self.file(f) new[f] = r.add(t, tr, linkrev) # update manifest mmap = self.manifest.read(self.manifest.tip()) mmap.update(new) for f in remove: del mmap[f] mnode = self.manifest.add(mmap, tr, linkrev) # add changeset new = new.keys() new.sort() edittext = text + "\n"+"".join(["HG: changed %s\n" % f for f in new]) edittext += "".join(["HG: removed %s\n" % f for f in remove]) edittext = self.ui.edit(edittext) n = self.changelog.add(mnode, new, edittext, tr) tr.close() self.setcurrent(n) self.dircache.update(new) self.dircache.remove(remove) def checkout(self, node): # checkout is really dumb at the moment # it ought to basically merge change = self.changelog.read(node) l = self.manifest.read(change[0]).items() l.sort() for f,n in l: if f[0] == "/": continue self.ui.note(f, "\n") t = self.file(f).revision(n) try: file(f, "w").write(t) except IOError: os.makedirs(os.path.dirname(f)) file(f, "w").write(t) self.setcurrent(node) self.dircache.clear() self.dircache.update([f for f,n in l]) def diffdir(self, path, changeset): changed = [] mf = {} added = [] if changeset: change = self.changelog.read(changeset) mf = self.manifest.read(change[0]) if changeset == self.current: dc = self.dircache.copy() else: dc = dict.fromkeys(mf) def fcmp(fn): t1 = file(os.path.join(self.root, fn)).read() t2 = self.file(fn).revision(mf[fn]) return cmp(t1, t2) for dir, subdirs, files in os.walk(self.root): d = dir[len(self.root)+1:] if ".hg" in subdirs: subdirs.remove(".hg") for f in files: fn = os.path.join(d, f) try: s = os.stat(os.path.join(self.root, fn)) except: continue if fn in dc: c = dc[fn] del dc[fn] if not c: if fcmp(fn): changed.append(fn) elif c[1] != s.st_size: changed.append(fn) elif c[0] != s.st_mode or c[2] != s.st_mtime: if fcmp(fn): changed.append(fn) else: if self.ignore(fn): continue added.append(fn) deleted = dc.keys() deleted.sort() return (changed, added, deleted) def diffrevs(self, node1, node2): changed, added = [], [] change = self.changelog.read(node1) mf1 = self.manifest.read(change[0]) change = self.changelog.read(node2) mf2 = self.manifest.read(change[0]) for fn in mf2: if mf1.has_key(fn): if mf1[fn] != mf2[fn]: changed.append(fn) del mf1[fn] else: added.append(fn) deleted = mf1.keys() deleted.sort() return (changed, added, deleted) def add(self, list): self.dircache.taint(list) def remove(self, list): dl = self.opener("to-remove", "a") for f in list: dl.write(f + "\n") def branches(self, nodes): if not nodes: nodes = [self.changelog.tip()] b = [] for n in nodes: t = n while n: p = self.changelog.parents(n) if p[1] != nullid or p[0] == nullid: b.append((t, n, p[0], p[1])) break n = p[0] return b def between(self, pairs): r = [] for top, bottom in pairs: n, l, i = top, [], 0 f = 1 while n != bottom: p = self.changelog.parents(n)[0] if i == f: l.append(n) f = f * 2 n = p i += 1 r.append(l) return r def newer(self, nodes): m = {} nl = [] pm = {} cl = self.changelog t = l = cl.count() # find the lowest numbered node for n in nodes: l = min(l, cl.rev(n)) m[n] = 1 for i in xrange(l, t): n = cl.node(i) if n in m: # explicitly listed pm[n] = 1 nl.append(n) continue for p in cl.parents(n): if p in pm: # parent listed pm[n] = 1 nl.append(n) break return nl def getchangegroup(self, remote): tip = remote.branches([])[0] self.ui.debug("remote tip branch is %s:%s\n" % (short(tip[0]), short(tip[1]))) m = self.changelog.nodemap unknown = [tip] search = [] fetch = [] if tip[0] in m: self.ui.note("nothing to do!\n") return None while unknown: n = unknown.pop(0) if n == nullid: break if n[1] and n[1] in m: # do we know the base? self.ui.debug("found incomplete branch %s\n" % short(n[1])) search.append(n) # schedule branch range for scanning else: if n[2] in m and n[3] in m: if n[1] not in fetch: self.ui.debug("found new changeset %s\n" % short(n[1])) fetch.append(n[1]) # earliest unknown continue for b in remote.branches([n[2], n[3]]): if b[0] not in m: unknown.append(b) while search: n = search.pop(0) l = remote.between([(n[0], n[1])])[0] p = n[0] f = 1 for i in l + [n[1]]: if i in m: if f <= 2: self.ui.debug("found new branch changeset %s\n" % short(p)) fetch.append(p) else: self.ui.debug("narrowed branch search to %s:%s\n" % (short(p), short(i))) search.append((p, i)) break p, f = i, f * 2 for f in fetch: if f in m: raise "already have", short(f[:4]) self.ui.note("adding new changesets starting at " + " ".join([short(f) for f in fetch]) + "\n") return remote.changegroup(fetch) def changegroup(self, basenodes): nodes = self.newer(basenodes) # construct the link map linkmap = {} for n in nodes: linkmap[self.changelog.rev(n)] = n # construct a list of all changed files changed = {} for n in nodes: c = self.changelog.read(n) for f in c[3]: changed[f] = 1 changed = changed.keys() changed.sort() # the changegroup is changesets + manifests + all file revs revs = [ self.changelog.rev(n) for n in nodes ] yield self.changelog.group(linkmap) yield self.manifest.group(linkmap) for f in changed: g = self.file(f).group(linkmap) if not g: raise "couldn't find change to %s" % f l = struct.pack(">l", len(f)) yield "".join([l, f, g]) def addchangegroup(self, generator): class genread: def __init__(self, generator): self.g = generator self.buf = "" def read(self, l): while l > len(self.buf): try: self.buf += self.g.next() except StopIteration: break d, self.buf = self.buf[:l], self.buf[l:] return d if not generator: return source = genread(generator) def getchunk(add = 0): d = source.read(4) if not d: return "" l = struct.unpack(">l", d)[0] return source.read(l - 4 + add) tr = self.transaction() simple = True need = {} self.ui.status("adding changesets\n") # pull off the changeset group def report(x): self.ui.debug("add changeset %s\n" % short(x)) return self.changelog.count() csg = getchunk() co = self.changelog.tip() cn = self.changelog.addgroup(csg, report, tr) self.ui.status("adding manifests\n") # pull off the manifest group mfg = getchunk() mm = self.manifest.tip() mo = self.manifest.addgroup(mfg, lambda x: self.changelog.rev(x), tr) # do we need a resolve? if self.changelog.ancestor(co, cn) != co: simple = False resolverev = self.changelog.count() # resolve the manifest to determine which files # we care about merging self.ui.status("resolving manifests\n") ma = self.manifest.ancestor(mm, mo) omap = self.manifest.read(mo) # other amap = self.manifest.read(ma) # ancestor mmap = self.manifest.read(mm) # mine nmap = {} self.ui.debug(" ancestor %s local %s remote %s\n" % (short(ma), short(mm), short(mo))) for f, mid in mmap.iteritems(): if f in omap: if mid != omap[f]: self.ui.debug(" %s versions differ, do resolve\n" % f) need[f] = mid # use merged version or local version else: nmap[f] = mid # keep ours del omap[f] elif f in amap: if mid != amap[f]: r = self.ui.prompt( (" local changed %s which remote deleted\n" % f) + "(k)eep or (d)elete?", "[kd]", "k") if r == "k": nmap[f] = mid else: self.ui.debug("other deleted %s\n" % f) pass # other deleted it else: self.ui.debug("local created %s\n" %f) nmap[f] = mid # we created it del mmap for f, oid in omap.iteritems(): if f in amap: if oid != amap[f]: r = self.ui.prompt( ("remote changed %s which local deleted\n" % f) + "(k)eep or (d)elete?", "[kd]", "k") if r == "k": nmap[f] = oid else: pass # probably safe else: self.ui.debug("remote created %s, do resolve\n" % f) need[f] = oid del omap del amap new = need.keys() new.sort() # process the files self.ui.status("adding files\n") while 1: f = getchunk(4) if not f: break fg = getchunk() self.ui.debug("adding %s revisions\n" % f) fl = self.file(f) o = fl.tip() n = fl.addgroup(fg, lambda x: self.changelog.rev(x), tr) if f in need: del need[f] # manifest resolve determined we need to merge the tips nmap[f] = self.merge3(fl, f, o, n, tr, resolverev) if need: # we need to do trivial merges on local files for f in new: if f not in need: continue fl = self.file(f) nmap[f] = self.merge3(fl, f, need[f], fl.tip(), tr, resolverev) # For simple merges, we don't need to resolve manifests or changesets if simple: self.ui.debug("simple merge, skipping resolve\n") tr.close() return node = self.manifest.add(nmap, tr, resolverev, mm, mo) # Now all files and manifests are merged, we add the changed files # and manifest id to the changelog self.ui.status("committing merge changeset\n") if co == cn: cn = -1 edittext = "\nHG: merge resolve\n" + \ "".join(["HG: changed %s\n" % f for f in new]) edittext = self.ui.edit(edittext) n = self.changelog.add(node, new, edittext, tr, co, cn) tr.close() def merge3(self, fl, fn, my, other, transaction, link): """perform a 3-way merge and append the result""" def temp(prefix, node): pre = "%s~%s." % (os.path.basename(fn), prefix) (fd, name) = tempfile.mkstemp("", pre) f = os.fdopen(fd, "w") f.write(fl.revision(node)) f.close() return name base = fl.ancestor(my, other) self.ui.note("resolving %s\n" % fn) self.ui.debug("local %s remote %s ancestor %s\n" % (short(my), short(other), short(base))) if my == base: text = fl.revision(other) else: a = temp("local", my) b = temp("remote", other) c = temp("parent", base) cmd = os.environ["HGMERGE"] self.ui.debug("invoking merge with %s\n" % cmd) r = os.system("%s %s %s %s" % (cmd, a, b, c)) if r: raise "Merge failed!" text = open(a).read() os.unlink(a) os.unlink(b) os.unlink(c) return fl.addrevision(text, transaction, link, my, other) class remoterepository: def __init__(self, ui, path): self.url = path.replace("hg://", "http://", 1) self.ui = ui def do_cmd(self, cmd, **args): self.ui.debug("sending %s command\n" % cmd) q = {"cmd": cmd} q.update(args) qs = urllib.urlencode(q) cu = "%s?%s" % (self.url, qs) return urllib.urlopen(cu) def branches(self, nodes): n = " ".join(map(hex, nodes)) d = self.do_cmd("branches", nodes=n).read() br = [ map(bin, b.split(" ")) for b in d.splitlines() ] return br def between(self, pairs): n = "\n".join(["-".join(map(hex, p)) for p in pairs]) d = self.do_cmd("between", pairs=n).read() p = [ map(bin, l.split(" ")) for l in d.splitlines() ] return p def changegroup(self, nodes): n = " ".join(map(hex, nodes)) zd = zlib.decompressobj() f = self.do_cmd("changegroup", roots=n) while 1: d = f.read(4096) if not d: yield zd.flush() break yield zd.decompress(d) def repository(ui, path=None, create=0): if path and path[:5] == "hg://": return remoterepository(ui, path) else: return localrepository(ui, path, create) class ui: def __init__(self, verbose=False, debug=False, quiet=False, interactive=True): self.quiet = quiet and not verbose and not debug self.verbose = verbose or debug self.debugflag = debug self.interactive = interactive def write(self, *args): for a in args: sys.stdout.write(str(a)) def readline(self): return sys.stdin.readline()[:-1] def prompt(self, msg, pat, default = "y"): if not self.interactive: return default while 1: self.write(msg, " ") r = self.readline() if re.match(pat, r): return r else: self.write("unrecognized response\n") def status(self, *msg): if not self.quiet: self.write(*msg) def warn(self, msg): self.write(*msg) def note(self, *msg): if self.verbose: self.write(*msg) def debug(self, *msg): if self.debugflag: self.write(*msg) def edit(self, text): (fd, name) = tempfile.mkstemp("hg") f = os.fdopen(fd, "w") f.write(text) f.close() editor = os.environ.get("EDITOR", "vi") r = os.system("%s %s" % (editor, name)) if r: raise "Edit failed!" t = open(name).read() t = re.sub("(?m)^HG:.*\n", "", t) return t class httprangereader: def __init__(self, url): self.url = url self.pos = 0 def seek(self, pos): self.pos = pos def read(self, bytes=None): opener = urllib2.build_opener(byterange.HTTPRangeHandler()) urllib2.install_opener(opener) req = urllib2.Request(self.url) end = '' if bytes: end = self.pos + bytes req.add_header('Range', 'bytes=%d-%s' % (self.pos, end)) f = urllib2.urlopen(req) return f.read()