changeset 15734:9b0efacd7745

merge with i18n
author Matt Mackall <mpm@selenic.com>
date Mon, 26 Dec 2011 18:07:49 -0600
parents bc2a22357538 (diff) d56e9df61010 (current diff)
children 5b384b7f48d5
files
diffstat 8 files changed, 195 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/win32mbcs.py	Sun Dec 25 19:49:14 2011 +0300
+++ b/hgext/win32mbcs.py	Mon Dec 26 18:07:49 2011 -0600
@@ -127,11 +127,14 @@
 # NOTE: os.path.dirname() and os.path.basename() are safe because
 #       they use result of os.path.split()
 funcs = '''os.path.join os.path.split os.path.splitext
- os.path.splitunc os.path.normpath os.makedirs
+ os.path.normpath os.makedirs
  mercurial.util.endswithsep mercurial.util.splitpath mercurial.util.checkcase
  mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
  mercurial.util.checkwinfilename mercurial.util.checkosfilename'''
 
+# List of Windows specific functions to be wrapped.
+winfuncs = '''os.path.splitunc'''
+
 # codec and alias names of sjis and big5 to be faked.
 problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
  hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
@@ -140,7 +143,8 @@
 
 def extsetup(ui):
     # TODO: decide use of config section for this extension
-    if not os.path.supports_unicode_filenames:
+    if ((not os.path.supports_unicode_filenames) and
+        (sys.platform != 'cygwin')):
         ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
         return
     # determine encoding for filename
@@ -150,6 +154,9 @@
     if _encoding.lower() in problematic_encodings.split():
         for f in funcs.split():
             wrapname(f, wrapper)
+        if os.name == 'nt':
+            for f in winfuncs.split():
+                wrapname(f, wrapper)
         wrapname("mercurial.osutil.listdir", wrapperforlistdir)
         # Check sys.args manually instead of using ui.debug() because
         # command line options is not yet applied when
--- a/mercurial/commands.py	Sun Dec 25 19:49:14 2011 +0300
+++ b/mercurial/commands.py	Mon Dec 26 18:07:49 2011 -0600
@@ -3897,14 +3897,21 @@
             return
         if df and not df(ctx.date()[0]):
             return
-        if opts['user'] and not [k for k in opts['user']
-                                 if k.lower() in ctx.user().lower()]:
-            return
+
+        lower = encoding.lower
+        if opts.get('user'):
+            luser = lower(ctx.user())
+            for k in [lower(x) for x in opts['user']]:
+                if (k in luser):
+                    break
+            else:
+                return
         if opts.get('keyword'):
-            for k in [kw.lower() for kw in opts['keyword']]:
-                if (k in ctx.user().lower() or
-                    k in ctx.description().lower() or
-                    k in " ".join(ctx.files()).lower()):
+            luser = lower(ctx.user())
+            ldesc = lower(ctx.description())
+            lfiles = lower(" ".join(ctx.files()))
+            for k in [lower(x) for x in opts['keyword']]:
+                if (k in luser or k in ldesc or k in lfiles):
                     break
             else:
                 return
--- a/mercurial/hgweb/webcommands.py	Sun Dec 25 19:49:14 2011 +0300
+++ b/mercurial/hgweb/webcommands.py	Mon Dec 26 18:07:49 2011 -0600
@@ -124,7 +124,8 @@
 
     def changelist(**map):
         count = 0
-        qw = query.lower().split()
+        lower = encoding.lower
+        qw = lower(query).split()
 
         def revgen():
             for i in xrange(len(web.repo) - 1, 0, -100):
@@ -139,9 +140,9 @@
         for ctx in revgen():
             miss = 0
             for q in qw:
-                if not (q in ctx.user().lower() or
-                        q in ctx.description().lower() or
-                        q in " ".join(ctx.files()).lower()):
+                if not (q in lower(ctx.user()) or
+                        q in lower(ctx.description()) or
+                        q in lower(" ".join(ctx.files()))):
                     miss = 1
                     break
             if miss:
--- a/mercurial/revset.py	Sun Dec 25 19:49:14 2011 +0300
+++ b/mercurial/revset.py	Mon Dec 26 18:07:49 2011 -0600
@@ -11,6 +11,7 @@
 import bookmarks as bookmarksmod
 import match as matchmod
 from i18n import _
+import encoding
 
 elements = {
     "(": (20, ("group", 1, ")"), ("func", 1, ")")),
@@ -233,8 +234,8 @@
     Alias for ``user(string)``.
     """
     # i18n: "author" is a keyword
-    n = getstring(x, _("author requires a string")).lower()
-    return [r for r in subset if n in repo[r].user().lower()]
+    n = encoding.lower(getstring(x, _("author requires a string")))
+    return [r for r in subset if n in encoding.lower(repo[r].user())]
 
 def bisect(repo, subset, x):
     """``bisect(string)``
@@ -376,11 +377,11 @@
     Search commit message for string. The match is case-insensitive.
     """
     # i18n: "desc" is a keyword
-    ds = getstring(x, _("desc requires a string")).lower()
+    ds = encoding.lower(getstring(x, _("desc requires a string")))
     l = []
     for r in subset:
         c = repo[r]
-        if ds in c.description().lower():
+        if ds in encoding.lower(c.description()):
             l.append(r)
     return l
 
@@ -522,12 +523,12 @@
     string. The match is case-insensitive.
     """
     # i18n: "keyword" is a keyword
-    kw = getstring(x, _("keyword requires a string")).lower()
+    kw = encoding.lower(getstring(x, _("keyword requires a string")))
     l = []
     for r in subset:
         c = repo[r]
         t = " ".join(c.files() + [c.user(), c.description()])
-        if kw in t.lower():
+        if kw in encoding.lower(t):
             l.append(r)
     return l
 
--- a/mercurial/util.py	Sun Dec 25 19:49:14 2011 +0300
+++ b/mercurial/util.py	Mon Dec 26 18:07:49 2011 -0600
@@ -622,9 +622,8 @@
     The root should be normcase-ed, too.
     '''
     def find(p, contents):
-        lenp = len(p)
         for n in contents:
-            if lenp == len(n) and normcase(n) == p:
+            if normcase(n) == p:
                 return n
         return None
 
@@ -641,14 +640,14 @@
             result.append(sep)
             continue
 
-        contents = _fspathcache.get(dir, None)
-        if contents is None:
-            contents = os.listdir(dir)
-            _fspathcache[dir] = contents
+        if dir not in _fspathcache:
+            _fspathcache[dir] = os.listdir(dir)
+        contents = _fspathcache[dir]
 
         found = find(part, contents)
         if not found:
-            # retry once for the corner case: add files after dir walking
+            # retry "once per directory" per "dirstate.walk" which
+            # may take place for each patches of "hg qpush", for example
             contents = os.listdir(dir)
             _fspathcache[dir] = contents
             found = find(part, contents)
--- a/tests/test-log.t	Sun Dec 25 19:49:14 2011 +0300
+++ b/tests/test-log.t	Mon Dec 26 18:07:49 2011 -0600
@@ -1159,3 +1159,56 @@
   $ hg log --template='{rev}:{node}\n' --hidden
   1:a765632148dc55d38c35c4f247c618701886cb2f
   0:9f758d63dcde62d547ebfb08e1e7ee96535f2b05
+
+clear extensions configuration
+  $ echo '[extensions]' >> $HGRCPATH
+  $ echo "hidden=!" >> $HGRCPATH
+  $ cd ..
+
+test -u/-k for problematic encoding
+# unicode: cp932:
+# u30A2    0x83 0x41(= 'A')
+# u30C2    0x83 0x61(= 'a')
+
+  $ hg init problematicencoding
+  $ cd problematicencoding
+
+  $ python > setup.sh <<EOF
+  > print u'''
+  > echo a > text
+  > hg add text
+  > hg --encoding utf-8 commit -u '\u30A2' -m none
+  > echo b > text
+  > hg --encoding utf-8 commit -u '\u30C2' -m none
+  > echo c > text
+  > hg --encoding utf-8 commit -u none -m '\u30A2'
+  > echo d > text
+  > hg --encoding utf-8 commit -u none -m '\u30C2'
+  > '''.encode('utf-8')
+  > EOF
+  $ sh < setup.sh
+
+test in problematic encoding
+  $ python > test.sh <<EOF
+  > print u'''
+  > hg --encoding cp932 log --template '{rev}\\n' -u '\u30A2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -u '\u30C2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -k '\u30A2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -k '\u30C2'
+  > '''.encode('cp932')
+  > EOF
+  $ sh < test.sh
+  0
+  ====
+  1
+  ====
+  2
+  0
+  ====
+  3
+  1
+
+  $ cd ..
--- a/tests/test-mq.t	Sun Dec 25 19:49:14 2011 +0300
+++ b/tests/test-mq.t	Mon Dec 26 18:07:49 2011 -0600
@@ -1394,3 +1394,46 @@
   patch queue now empty
   $ cd ..
 
+
+test case preservation through patch pushing especially on case
+insensitive filesystem
+
+  $ hg init casepreserve
+  $ cd casepreserve
+
+  $ hg qnew add-file1
+  $ echo a > TeXtFiLe.TxT
+  $ hg add TeXtFiLe.TxT
+  $ hg qrefresh
+
+  $ hg qnew add-file2
+  $ echo b > AnOtHeRFiLe.TxT
+  $ hg add AnOtHeRFiLe.TxT
+  $ hg qrefresh
+
+  $ hg qnew modify-file
+  $ echo c >> AnOtHeRFiLe.TxT
+  $ hg qrefresh
+
+  $ hg qapplied
+  add-file1
+  add-file2
+  modify-file
+  $ hg qpop -a
+  popping modify-file
+  popping add-file2
+  popping add-file1
+  patch queue now empty
+
+this qpush causes problems below, if case preservation on case
+insensitive filesystem is not enough:
+(1) unexpected "adding ..." messages are shown
+(2) patching fails in modification of (1) files
+
+  $ hg qpush -a
+  applying add-file1
+  applying add-file2
+  applying modify-file
+  now at: modify-file
+
+  $ cd ..
--- a/tests/test-revset.t	Sun Dec 25 19:49:14 2011 +0300
+++ b/tests/test-revset.t	Mon Dec 26 18:07:49 2011 -0600
@@ -475,3 +475,61 @@
   $ log 'max(1 or 2) and not 2'
   $ log 'min(1 or 2) and not 1'
   $ log 'last(1 or 2, 1) and not 2'
+
+  $ cd ..
+
+test author/desc/keyword in problematic encoding
+# unicode: cp932:
+# u30A2    0x83 0x41(= 'A')
+# u30C2    0x83 0x61(= 'a')
+
+  $ hg init problematicencoding
+  $ cd problematicencoding
+
+  $ python > setup.sh <<EOF
+  > print u'''
+  > echo a > text
+  > hg add text
+  > hg --encoding utf-8 commit -u '\u30A2' -m none
+  > echo b > text
+  > hg --encoding utf-8 commit -u '\u30C2' -m none
+  > echo c > text
+  > hg --encoding utf-8 commit -u none -m '\u30A2'
+  > echo d > text
+  > hg --encoding utf-8 commit -u none -m '\u30C2'
+  > '''.encode('utf-8')
+  > EOF
+  $ sh < setup.sh
+
+test in problematic encoding
+  $ python > test.sh <<EOF
+  > print u'''
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'author(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'desc(\u30C2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30A2)'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -r 'keyword(\u30C2)'
+  > '''.encode('cp932')
+  > EOF
+  $ sh < test.sh
+  0
+  ====
+  1
+  ====
+  2
+  ====
+  3
+  ====
+  0
+  2
+  ====
+  1
+  3
+
+  $ cd ..