Mercurial > hg
comparison hgext/win32mbcs.py @ 6887:304484c7e0ba
Update win32mbcs extension
* Code cleanup by Matt.
* Fix the issue with case-insensitive fs support
by wrapping also util.fspath() and util.checkcase()
* Abort program when path conversion is failed.
author | Shun-ichi Goto <shunichi.goto@gmail.com> |
---|---|
date | Wed, 13 Aug 2008 20:18:40 -0500 |
parents | 942287cb1f57 |
children | 26adfaccdf73 |
comparison
equal
deleted
inserted
replaced
6886:41aaaa23745f | 6887:304484c7e0ba |
---|---|
1 # win32mbcs.py -- MBCS filename support for Mercurial on Windows | 1 # win32mbcs.py -- MBCS filename support for Mercurial |
2 # | 2 # |
3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> | 3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com> |
4 # | 4 # |
5 # Version: 0.1 | 5 # Version: 0.2 |
6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com> | 6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com> |
7 # | 7 # |
8 # This software may be used and distributed according to the terms | 8 # This software may be used and distributed according to the terms |
9 # of the GNU General Public License, incorporated herein by reference. | 9 # of the GNU General Public License, incorporated herein by reference. |
10 # | 10 # |
11 """Allow to use shift_jis/big5 filenames on Windows. | 11 """Allow to use MBCS path with problematic encoding. |
12 | 12 |
13 There is a well known issue "0x5c problem" on Windows. It is a | 13 Some MBCS encodings are not good for some path operations |
14 trouble on handling path name as raw encoded byte sequence of | 14 (i.e. splitting path, case conversion, etc.) with its encoded bytes. |
15 problematic encodings like shift_jis or big5. The primary intent | 15 We call such a encoding (i.e. shift_jis and big5) as "problematic |
16 of this extension is to allow using such a encoding on Mercurial | 16 encoding". This extension can be used to fix the issue with those |
17 without strange file operation error. | 17 encodings by wrapping some functions to convert to unicode string |
18 | 18 before path operation. |
19 By enabling this extension, hook mechanism is activated and some | |
20 functions are altered. Usually, this encoding is your local encoding | |
21 on your system by default. So you can get benefit simply by enabling | |
22 this extension. | |
23 | |
24 The encoding for filename is same one for terminal by default. You | |
25 can change the encoding by setting HGENCODING environment variable. | |
26 | 19 |
27 This extension is usefull for: | 20 This extension is usefull for: |
28 * Japanese Windows user using shift_jis encoding. | 21 * Japanese Windows users using shift_jis encoding. |
29 * Chinese Windows user using big5 encoding. | 22 * Chinese Windows users using big5 encoding. |
30 * Users who want to use a repository created with such a encoding. | 23 * All users who use a repository with one of problematic encodings |
24 on case-insensitive file system. | |
31 | 25 |
32 Note: Unix people does not need to use this extension. | 26 This extension is not needed for: |
27 * Any user who use only ascii chars in path. | |
28 * Any user who do not use any of problematic encodings. | |
29 | |
30 Note that there are some limitations on using this extension: | |
31 * You should use single encoding in one repository. | |
32 * You should set same encoding for the repository by locale or HGENCODING. | |
33 | |
34 To use this extension, enable the extension in .hg/hgrc or ~/.hgrc: | |
35 | |
36 [extensions] | |
37 hgext.win32mbcs = | |
38 | |
39 Path encoding conversion are done between unicode and util._encoding | |
40 which is decided by mercurial from current locale setting or HGENCODING. | |
33 | 41 |
34 """ | 42 """ |
35 | 43 |
36 import os | 44 import os |
37 from mercurial.i18n import _ | 45 from mercurial.i18n import _ |
38 from mercurial import util | 46 from mercurial import util |
39 | 47 |
40 __all__ = ['install', 'uninstall', 'reposetup'] | 48 def decode(arg): |
49 if isinstance(arg, str): | |
50 uarg = arg.decode(util._encoding) | |
51 if arg == uarg.encode(util._encoding): | |
52 return uarg | |
53 raise UnicodeError("Not local encoding") | |
54 elif isinstance(arg, tuple): | |
55 return tuple(map(decode, arg)) | |
56 elif isinstance(arg, list): | |
57 return map(decode, arg) | |
58 return arg | |
41 | 59 |
60 def encode(arg): | |
61 if isinstance(arg, unicode): | |
62 return arg.encode(util._encoding) | |
63 elif isinstance(arg, tuple): | |
64 return tuple(map(encode, arg)) | |
65 elif isinstance(arg, list): | |
66 return map(encode, arg) | |
67 return arg | |
68 | |
69 def wrapper(func, args): | |
70 # check argument is unicode, then call original | |
71 for arg in args: | |
72 if isinstance(arg, unicode): | |
73 return func(*args) | |
74 | |
75 try: | |
76 # convert arguments to unicode, call func, then convert back | |
77 return encode(func(*decode(args))) | |
78 except UnicodeError: | |
79 # If not encoded with util._encoding, report it then | |
80 # continue with calling original function. | |
81 raise util.Abort(_("[win32mbcs] filename conversion fail with" | |
82 " %s encoding\n") % (util._encoding)) | |
83 | |
84 def wrapname(name): | |
85 idx = name.rfind('.') | |
86 module = name[:idx] | |
87 name = name[idx+1:] | |
88 module = eval(module) | |
89 func = getattr(module, name) | |
90 def f(*args): | |
91 return wrapper(func, args) | |
92 try: | |
93 f.__name__ = func.__name__ # fail with python23 | |
94 except Exception: | |
95 pass | |
96 setattr(module, name, f) | |
97 | |
98 # List of functions to be wrapped. | |
99 # NOTE: os.path.dirname() and os.path.basename() are safe because | |
100 # they use result of os.path.split() | |
101 funcs = '''os.path.join os.path.split os.path.splitext | |
102 os.path.splitunc os.path.normpath os.path.normcase os.makedirs | |
103 util.endswithsep util.splitpath util.checkcase util.fspath''' | |
42 | 104 |
43 # codec and alias names of sjis and big5 to be faked. | 105 # codec and alias names of sjis and big5 to be faked. |
44 _problematic_encodings = util.frozenset([ | 106 problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs |
45 'big5', 'big5-tw', 'csbig5', | 107 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis |
46 'big5hkscs', 'big5-hkscs', 'hkscs', | 108 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004 |
47 'cp932', '932', 'ms932', 'mskanji', 'ms-kanji', | 109 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213''' |
48 'shift_jis', 'csshiftjis', 'shiftjis', 'sjis', 's_jis', | |
49 'shift_jis_2004', 'shiftjis2004', 'sjis_2004', 'sjis2004', | |
50 'shift_jisx0213', 'shiftjisx0213', 'sjisx0213', 's_jisx0213', | |
51 ]) | |
52 | |
53 # attribute name to store original function | |
54 _ORIGINAL = '_original' | |
55 | |
56 _ui = None | |
57 | |
58 def decode_with_check(arg): | |
59 if isinstance(arg, tuple): | |
60 return tuple(map(decode_with_check, arg)) | |
61 elif isinstance(arg, list): | |
62 return map(decode_with_check, arg) | |
63 elif isinstance(arg, str): | |
64 uarg = arg.decode(util._encoding) | |
65 if arg == uarg.encode(util._encoding): | |
66 return uarg | |
67 else: | |
68 raise UnicodeError("Not local encoding") | |
69 else: | |
70 return arg | |
71 | |
72 def encode_with_check(arg): | |
73 if isinstance(arg, tuple): | |
74 return tuple(map(encode_with_check, arg)) | |
75 elif isinstance(arg, list): | |
76 return map(encode_with_check, arg) | |
77 elif isinstance(arg, unicode): | |
78 ret = arg.encode(util._encoding) | |
79 return ret | |
80 else: | |
81 return arg | |
82 | |
83 def wrap(func): | |
84 | |
85 def wrapped(*args): | |
86 # check argument is unicode, then call original | |
87 for arg in args: | |
88 if isinstance(arg, unicode): | |
89 return func(*args) | |
90 # make decoded argument list into uargs | |
91 try: | |
92 args = decode_with_check(args) | |
93 except UnicodeError, exc: | |
94 # If not encoded with _local_fs_encoding, report it then | |
95 # continue with calling original function. | |
96 _ui.warn(_("WARNING: [win32mbcs] filename conversion fail for" + | |
97 " %s: '%s'\n") % (util._encoding, args)) | |
98 return func(*args) | |
99 # call as unicode operation, then return with encoding | |
100 return encode_with_check(func(*args)) | |
101 | |
102 # fake is only for relevant environment. | |
103 if hasattr(func, _ORIGINAL) or \ | |
104 util._encoding.lower() not in _problematic_encodings: | |
105 return func | |
106 else: | |
107 f = wrapped | |
108 f.__name__ = func.__name__ | |
109 setattr(f, _ORIGINAL, func) # hold original to restore | |
110 return f | |
111 | |
112 def unwrap(func): | |
113 return getattr(func, _ORIGINAL, func) | |
114 | |
115 def install(): | |
116 # wrap some python functions and mercurial functions | |
117 # to handle raw bytes on Windows. | |
118 # NOTE: dirname and basename is safe because they use result | |
119 # of os.path.split() | |
120 global _ui | |
121 if not _ui: | |
122 from mercurial import ui | |
123 _ui = ui.ui() | |
124 os.path.join = wrap(os.path.join) | |
125 os.path.split = wrap(os.path.split) | |
126 os.path.splitext = wrap(os.path.splitext) | |
127 os.path.splitunc = wrap(os.path.splitunc) | |
128 os.path.normpath = wrap(os.path.normpath) | |
129 os.path.normcase = wrap(os.path.normcase) | |
130 os.makedirs = wrap(os.makedirs) | |
131 util.endswithsep = wrap(util.endswithsep) | |
132 util.splitpath = wrap(util.splitpath) | |
133 | |
134 def uninstall(): | |
135 # restore original functions. | |
136 os.path.join = unwrap(os.path.join) | |
137 os.path.split = unwrap(os.path.split) | |
138 os.path.splitext = unwrap(os.path.splitext) | |
139 os.path.splitunc = unwrap(os.path.splitunc) | |
140 os.path.normpath = unwrap(os.path.normpath) | |
141 os.path.normcase = unwrap(os.path.normcase) | |
142 os.makedirs = unwrap(os.makedirs) | |
143 util.endswithsep = unwrap(util.endswithsep) | |
144 util.splitpath = unwrap(util.splitpath) | |
145 | |
146 | 110 |
147 def reposetup(ui, repo): | 111 def reposetup(ui, repo): |
148 # TODO: decide use of config section for this extension | 112 # TODO: decide use of config section for this extension |
149 global _ui | 113 if not os.path.supports_unicode_filenames: |
150 _ui = ui | 114 ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) |
151 if not os.path.supports_unicode_filenames: | 115 return |
152 ui.warn(_("[win32mbcs] cannot activate on this platform.\n")) | |
153 return | |
154 # install features of this extension | |
155 install() | |
156 ui.debug(_("[win32mbcs] activeted with encoding: %s\n") % util._encoding) | |
157 | 116 |
158 # win32mbcs.py ends here | 117 # fake is only for relevant environment. |
118 if util._encoding.lower() in problematic_encodings.split(): | |
119 for f in funcs.split(): | |
120 wrapname(f) | |
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding) | |
122 |