annotate mercurial/encoding.py @ 9410:1c83938b6a8e

extensions: load and configure extensions in well-defined phases Extensions are now loaded with a call-graph like this: dispatch._dispatch extensions.loadall extensions.load # add foo module to extensions._extensions extensions.load # add bar module to extensions._extensions foo.uisetup(ui) bar.uisetup(ui) foo.extsetup() bar.extsetup() commands.table.update(foo.cmdtable) commands.table.update(bar.cmdtable) hg.repository foo.reposetup(ui, repo) bar.reposetup(ui, repo) The uisetup calls could easily be moved out to dispatch._dispatch, but have been kept in extensions.loadall since at least TortoiseHg calls extensions.loadall and expects it to call uisetup. The extensions.load function called uisetup. It now has an unused ui argument which has been kept for backwards compatibility.
author Martin Geisler <mg@lazybytes.net>
date Sat, 29 Aug 2009 00:29:16 +0200
parents b87a50b7125c
children 9e9f63d5c456
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
1 # encoding.py - character transcoding support for Mercurial
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
2 #
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
4 #
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
5 # This software may be used and distributed according to the terms of the
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
6 # GNU General Public License version 2, incorporated herein by reference.
7948
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
7
8312
b87a50b7125c separate import lines from mercurial and general python modules
Simon Heimberg <simohe@besonet.ch>
parents: 8226
diff changeset
8 import error
b87a50b7125c separate import lines from mercurial and general python modules
Simon Heimberg <simohe@besonet.ch>
parents: 8226
diff changeset
9 import sys, unicodedata, locale, os
7948
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
10
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
11 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
12
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
13 try:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
14 encoding = os.environ.get("HGENCODING")
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
15 if sys.platform == 'darwin' and not encoding:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
16 # On darwin, getpreferredencoding ignores the locale environment and
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
17 # always returns mac-roman. We override this if the environment is
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
18 # not C (has been customized by the user).
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
19 locale.setlocale(locale.LC_CTYPE, '')
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
20 encoding = locale.getlocale()[1]
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
21 if not encoding:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
22 encoding = locale.getpreferredencoding() or 'ascii'
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
23 encoding = _encodingfixup.get(encoding, encoding)
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
24 except locale.Error:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
25 encoding = 'ascii'
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
26 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
27 fallbackencoding = 'ISO-8859-1'
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
28
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
29 def tolocal(s):
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
30 """
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
31 Convert a string from internal UTF-8 to local encoding
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
32
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
33 All internal strings should be UTF-8 but some repos before the
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
34 implementation of locale support may contain latin1 or possibly
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
35 other character sets. We attempt to decode everything strictly
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
36 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
37 replace unknown characters.
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
38 """
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
39 for e in ('UTF-8', fallbackencoding):
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
40 try:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
41 u = s.decode(e) # attempt strict decoding
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
42 return u.encode(encoding, "replace")
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
43 except LookupError, k:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
44 raise error.Abort("%s, please check your locale settings" % k)
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
45 except UnicodeDecodeError:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
46 pass
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
47 u = s.decode("utf-8", "replace") # last ditch
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
48 return u.encode(encoding, "replace")
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
49
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
50 def fromlocal(s):
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
51 """
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
52 Convert a string from the local character encoding to UTF-8
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
53
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
54 We attempt to decode strings using the encoding mode set by
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
55 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
56 characters will cause an error message. Other modes include
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
57 'replace', which replaces unknown characters with a special
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
58 Unicode character, and 'ignore', which drops the character.
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
59 """
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
60 try:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
61 return s.decode(encoding, encodingmode).encode("utf-8")
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
62 except UnicodeDecodeError, inst:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
63 sub = s[max(0, inst.start-10):inst.start+10]
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
64 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
65 except LookupError, k:
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
66 raise error.Abort("%s, please check your locale settings" % k)
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
67
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
68 def colwidth(s):
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
69 "Find the column width of a UTF-8 string for display"
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
70 d = s.decode(encoding, 'replace')
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
71 if hasattr(unicodedata, 'east_asian_width'):
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
72 w = unicodedata.east_asian_width
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
73 return sum([w(c) in 'WF' and 2 or 1 for c in d])
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
74 return len(d)
de377b1a9a84 move encoding bits from util to encoding
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
75