i18n: use encoding.lower/upper for encoding aware case folding
this patch uses encoding.lower/upper for case folding, because ones of
str can not fold case of non ascii characters correctly.
to avoid cyclic dependency and to encapsulate logic of normcase in
each platforms, this patch introduces encodinglower/encodingupper in
both posix/windows specific files.
this patch does not change implementation of normcase() in posix.py,
because we do not know the encoding of filenames on POSIX.
some "normcase()" are excluded from function wrap list in
hgext/win32mbcs.py, because they become encoding aware by this patch.
--- a/hgext/win32mbcs.py Fri Dec 16 21:09:40 2011 +0900
+++ b/hgext/win32mbcs.py Fri Dec 16 21:09:41 2011 +0900
@@ -128,8 +128,6 @@
# they use result of os.path.split()
funcs = '''os.path.join os.path.split os.path.splitext
os.path.splitunc os.path.normpath os.makedirs
- mercurial.windows.normcase
- mercurial.util.normcase
mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
mercurial.util.checkwinfilename mercurial.util.checkosfilename'''
--- a/mercurial/encoding.py Fri Dec 16 21:09:40 2011 +0900
+++ b/mercurial/encoding.py Fri Dec 16 21:09:41 2011 +0900
@@ -171,3 +171,22 @@
return lu.encode(encoding)
except UnicodeError:
return s.lower() # we don't know how to fold this except in ASCII
+ except LookupError, k:
+ raise error.Abort(k, hint="please check your locale settings")
+
+def upper(s):
+ "best-effort encoding-aware case-folding of local string s"
+ try:
+ if isinstance(s, localstr):
+ u = s._utf8.decode("utf-8")
+ else:
+ u = s.decode(encoding, encodingmode)
+
+ uu = u.upper()
+ if u == uu:
+ return s # preserve localstring
+ return uu.encode(encoding)
+ except UnicodeError:
+ return s.upper() # we don't know how to fold this except in ASCII
+ except LookupError, k:
+ raise error.Abort(k, hint="please check your locale settings")
--- a/mercurial/posix.py Fri Dec 16 21:09:40 2011 +0900
+++ b/mercurial/posix.py Fri Dec 16 21:09:41 2011 +0900
@@ -164,6 +164,9 @@
st2 = os.lstat(fpath2)
return st1.st_dev == st2.st_dev
+encodinglower = None
+encodingupper = None
+
# os.path.normcase is a no-op, which doesn't help us on non-native filesystems
def normcase(path):
return path.lower()
--- a/mercurial/util.py Fri Dec 16 21:09:40 2011 +0900
+++ b/mercurial/util.py Fri Dec 16 21:09:41 2011 +0900
@@ -24,6 +24,9 @@
else:
import posix as platform
+platform.encodinglower = encoding.lower
+platform.encodingupper = encoding.upper
+
cachestat = platform.cachestat
checkexec = platform.checkexec
checklink = platform.checklink
--- a/mercurial/windows.py Fri Dec 16 21:09:40 2011 +0900
+++ b/mercurial/windows.py Fri Dec 16 21:09:41 2011 +0900
@@ -131,8 +131,11 @@
def normpath(path):
return pconvert(os.path.normpath(path))
+encodinglower = None
+encodingupper = None
+
def normcase(path):
- return path.upper()
+ return encodingupper(path)
def realpath(path):
'''