changeset 15725:988409e44a76

i18n: use "encoding.lower()" to normalize specified keywords for log searching some problematic encoding (e.g.: cp932) uses ASCII alphabet characters in byte sequence of multi byte characters. "str.lower()" on such byte sequence may treat distinct characters as same one, and cause unexpected log matching. this patch uses "encoding.lower()" instead of "str.lower()" to normalize strings for compare.
author FUJIWARA Katsunori <foozy@lares.dti.ne.jp>
date Sun, 25 Dec 2011 20:35:16 +0900
parents 9e6a13c2aeb9
children 9b822edecb4c
files mercurial/commands.py tests/test-log.t
diffstat 2 files changed, 67 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/commands.py	Sun Dec 25 20:32:48 2011 +0900
+++ b/mercurial/commands.py	Sun Dec 25 20:35:16 2011 +0900
@@ -3897,14 +3897,21 @@
             return
         if df and not df(ctx.date()[0]):
             return
-        if opts['user'] and not [k for k in opts['user']
-                                 if k.lower() in ctx.user().lower()]:
-            return
+
+        lower = encoding.lower
+        if opts.get('user'):
+            luser = lower(ctx.user())
+            for k in [lower(x) for x in opts['user']]:
+                if (k in luser):
+                    break
+            else:
+                return
         if opts.get('keyword'):
-            for k in [kw.lower() for kw in opts['keyword']]:
-                if (k in ctx.user().lower() or
-                    k in ctx.description().lower() or
-                    k in " ".join(ctx.files()).lower()):
+            luser = lower(ctx.user())
+            ldesc = lower(ctx.description())
+            lfiles = lower(" ".join(ctx.files()))
+            for k in [lower(x) for x in opts['keyword']]:
+                if (k in luser or k in ldesc or k in lfiles):
                     break
             else:
                 return
--- a/tests/test-log.t	Sun Dec 25 20:32:48 2011 +0900
+++ b/tests/test-log.t	Sun Dec 25 20:35:16 2011 +0900
@@ -1159,3 +1159,56 @@
   $ hg log --template='{rev}:{node}\n' --hidden
   1:a765632148dc55d38c35c4f247c618701886cb2f
   0:9f758d63dcde62d547ebfb08e1e7ee96535f2b05
+
+clear extensions configuration
+  $ echo '[extensions]' >> $HGRCPATH
+  $ echo "hidden=!" >> $HGRCPATH
+  $ cd ..
+
+test -u/-k for problematic encoding
+# unicode: cp932:
+# u30A2    0x83 0x41(= 'A')
+# u30C2    0x83 0x61(= 'a')
+
+  $ hg init problematicencoding
+  $ cd problematicencoding
+
+  $ python > setup.sh <<EOF
+  > print u'''
+  > echo a > text
+  > hg add text
+  > hg --encoding utf-8 commit -u '\u30A2' -m none
+  > echo b > text
+  > hg --encoding utf-8 commit -u '\u30C2' -m none
+  > echo c > text
+  > hg --encoding utf-8 commit -u none -m '\u30A2'
+  > echo d > text
+  > hg --encoding utf-8 commit -u none -m '\u30C2'
+  > '''.encode('utf-8')
+  > EOF
+  $ sh < setup.sh
+
+test in problematic encoding
+  $ python > test.sh <<EOF
+  > print u'''
+  > hg --encoding cp932 log --template '{rev}\\n' -u '\u30A2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -u '\u30C2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -k '\u30A2'
+  > echo ====
+  > hg --encoding cp932 log --template '{rev}\\n' -k '\u30C2'
+  > '''.encode('cp932')
+  > EOF
+  $ sh < test.sh
+  0
+  ====
+  1
+  ====
+  2
+  0
+  ====
+  3
+  1
+
+  $ cd ..