Mercurial > hg
changeset 3821:158fce02dc40
Teach convert-repo to deal with mixed charsets in git
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 07 Dec 2006 18:03:28 -0600 |
parents | 4f056896c093 |
children | 28134d82db9b ed5a9b27bedc |
files | contrib/convert-repo |
diffstat | 1 files changed, 15 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/contrib/convert-repo Wed Dec 06 17:59:19 2006 -0600 +++ b/contrib/convert-repo Thu Dec 07 18:03:28 2006 -0600 @@ -21,8 +21,20 @@ # interrupted and can be run repeatedly to copy new commits. import sys, os, zlib, sha, time + +os.environ["HGENCODING"] = "utf-8" + from mercurial import hg, ui, util +def recode(s): + try: + return s.decode("utf-8").encode("utf-8") + except: + try: + return s.decode("latin-1").encode("utf-8") + except: + return s.decode("utf-8", "replace").encode("utf-8") + class convert_git: def __init__(self, path): self.path = path @@ -55,6 +67,7 @@ c = self.catfile(version, "commit") # read the commit hash end = c.find("\n\n") message = c[end+2:] + message = recode(message) l = c[:end].splitlines() manifest = l[0].split()[1] parents = [] @@ -65,11 +78,13 @@ tm, tz = p[-2:] author = " ".join(p[:-2]) if author[0] == "<": author = author[1:-1] + author = recode(author) if n == "committer": p = v.split() tm, tz = p[-2:] committer = " ".join(p[:-2]) if committer[0] == "<": committer = committer[1:-1] + committer = recode(committer) message += "\ncommitter: %s\n" % v if n == "parent": parents.append(v)