# HG changeset patch # User Matt Mackall # Date 1322004392 21600 # Node ID 1fa41d1f135107fde61f7cb868a9d6d8c36d9396 # Parent b2fd4746414a333e914689a5696c7c84d82ff641 posix: add extended support for OS X path folding OS X does the following transformation on paths for comparisons: a) 8-bit strings are decoded as UTF-8 to UTF-16 b) undecodable bytes are percent-escaped c) accented characters are converted to NFD decomposed form, approximately d) characters are converted to _lowercase_ using internal tables Both (c) and (d) are done using internal tables that vary from release to release and match Unicode specs to greater or lesser extent. We approximate these functions using Python's internal Unicode data. With this change, Mercurial will (in almost all cases) match OS X folding and not report unknown file aliases for files in UTF-8 or other encodings. diff -r b2fd4746414a -r 1fa41d1f1351 mercurial/posix.py --- a/mercurial/posix.py Tue Nov 22 17:26:31 2011 -0600 +++ b/mercurial/posix.py Tue Nov 22 17:26:32 2011 -0600 @@ -6,7 +6,7 @@ # GNU General Public License version 2 or any later version. from i18n import _ -import os, sys, errno, stat, getpass, pwd, grp, tempfile +import os, sys, errno, stat, getpass, pwd, grp, tempfile, unicodedata posixfile = open nulldev = '/dev/null' @@ -170,6 +170,24 @@ if sys.platform == 'darwin': import fcntl # only needed on darwin, missing on jython + + def normcase(path): + try: + u = path.decode('utf-8') + except UnicodeDecodeError: + # percent-encode any characters that don't round-trip + p2 = path.decode('utf-8', 'replace').encode('utf-8') + s = "" + for a, b in zip(path, p2): + if a != b: + s += "%%%02X" % ord(a) + else: + s += a + u = s.decode('utf-8') + + # Decompose then lowercase (HFS+ technote specifies lower) + return unicodedata.normalize('NFD', u).lower().encode('utf-8') + def realpath(path): ''' Returns the true, canonical file system path equivalent to the given