changeset 15551:1fa41d1f1351 stable

posix: add extended support for OS X path folding OS X does the following transformation on paths for comparisons: a) 8-bit strings are decoded as UTF-8 to UTF-16 b) undecodable bytes are percent-escaped c) accented characters are converted to NFD decomposed form, approximately d) characters are converted to _lowercase_ using internal tables Both (c) and (d) are done using internal tables that vary from release to release and match Unicode specs to greater or lesser extent. We approximate these functions using Python's internal Unicode data. With this change, Mercurial will (in almost all cases) match OS X folding and not report unknown file aliases for files in UTF-8 or other encodings.
author Matt Mackall <mpm@selenic.com>
date Tue, 22 Nov 2011 17:26:32 -0600
parents b2fd4746414a
children 62c9183a0bbb
files mercurial/posix.py
diffstat 1 files changed, 19 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/posix.py	Tue Nov 22 17:26:31 2011 -0600
+++ b/mercurial/posix.py	Tue Nov 22 17:26:32 2011 -0600
@@ -6,7 +6,7 @@
 # GNU General Public License version 2 or any later version.
 
 from i18n import _
-import os, sys, errno, stat, getpass, pwd, grp, tempfile
+import os, sys, errno, stat, getpass, pwd, grp, tempfile, unicodedata
 
 posixfile = open
 nulldev = '/dev/null'
@@ -170,6 +170,24 @@
 
 if sys.platform == 'darwin':
     import fcntl # only needed on darwin, missing on jython
+
+    def normcase(path):
+        try:
+            u = path.decode('utf-8')
+        except UnicodeDecodeError:
+            # percent-encode any characters that don't round-trip
+            p2 = path.decode('utf-8', 'replace').encode('utf-8')
+            s = ""
+            for a, b in zip(path, p2):
+                if a != b:
+                    s += "%%%02X" % ord(a)
+                else:
+                    s += a
+            u = s.decode('utf-8')
+
+        # Decompose then lowercase (HFS+ technote specifies lower)
+        return unicodedata.normalize('NFD', u).lower().encode('utf-8')
+
     def realpath(path):
         '''
         Returns the true, canonical file system path equivalent to the given