Mercurial > hg
changeset 26876:b8381832ce2b
posix: use getutf8char to handle OS X filename percent-escaping
This replaces an open-coded utf-8 parser that was ignoring subtle issues
like overlong encodings.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Thu, 05 Nov 2015 17:09:00 -0600 |
parents | cf47bdb2183c |
children | cb467a9d7593 |
files | mercurial/posix.py |
diffstat | 1 files changed, 10 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/posix.py Thu Nov 05 16:48:46 2015 -0600 +++ b/mercurial/posix.py Thu Nov 05 17:09:00 2015 -0600 @@ -255,40 +255,17 @@ except UnicodeDecodeError: # OS X percent-encodes any bytes that aren't valid utf-8 s = '' - g = '' - l = 0 - for c in path: - o = ord(c) - if l and o < 128 or o >= 192: - # we want a continuation byte, but didn't get one - s += ''.join(["%%%02X" % ord(x) for x in g]) - g = '' - l = 0 - if l == 0 and o < 128: - # ascii - s += c - elif l == 0 and 194 <= o < 245: - # valid leading bytes - if o < 224: - l = 1 - elif o < 240: - l = 2 - else: - l = 3 - g = c - elif l > 0 and 128 <= o < 192: - # valid continuations - g += c - l -= 1 - if not l: - s += g - g = '' - else: - # invalid - s += "%%%02X" % o + pos = 0 + l = len(s) + while pos < l: + try: + c = encoding.getutf8char(path, pos) + pos += len(c) + except ValueError: + c = '%%%%02X' % path[pos] + pos += 1 + s += c - # any remaining partial characters - s += ''.join(["%%%02X" % ord(x) for x in g]) u = s.decode('utf-8') # Decompose then lowercase (HFS+ technote specifies lower)