annotate mercurial/i18n.py @ 47387:75e1104f23a2

revlog: use dedicated code for reading sidedata We are about to introduce a new, dedicated, file to store sidedata. Before doing so, we make sidedata reading go through different code as reading data chunk. This will simplify some of the complexity of the next changesets. The reading is very simple right now and will need some improvement later to reuse some of the caching strategy we use for the data file. Differential Revision: https://phab.mercurial-scm.org/D10785
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Thu, 27 May 2021 04:09:30 +0200
parents d4ba4d51f85f
children 6000f5b25c9b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
1 # i18n.py - internationalization support for mercurial
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
2 #
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 46807
diff changeset
3 # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
4 #
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
5 # This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9538
diff changeset
6 # GNU General Public License version 2 or any later version.
1400
cf9a1233738a i18n first part: make '_' available for files who need it
Benoit Boissinot <benoit.boissinot@ens-lyon.org
parents:
diff changeset
7
25955
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
8 from __future__ import absolute_import
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
9
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
10 import gettext as gettextmod
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
11 import locale
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
12 import os
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
13 import sys
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
14
43089
c59eb1560c44 py3: manually import getattr where it is needed
Gregory Szorc <gregory.szorc@gmail.com>
parents: 43077
diff changeset
15 from .pycompat import getattr
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
16 from .utils import resourceutil
30050
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
17 from . import (
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
18 encoding,
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
19 pycompat,
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
20 )
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
21
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
22 if pycompat.TYPE_CHECKING:
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
23 from typing import (
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
24 Callable,
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
25 List,
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
26 )
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
27
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
28
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
29 # modelled after templater.templatepath:
14975
b64538363dbe i18n: use getattr instead of hasattr
Augie Fackler <durin42@gmail.com>
parents: 13849
diff changeset
30 if getattr(sys, 'frozen', None) is not None:
30669
10b17ed9b591 py3: replace sys.executable with pycompat.sysexecutable
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30639
diff changeset
31 module = pycompat.sysexecutable
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
32 else:
31074
2912b06905dc py3: use pycompat.fsencode() to convert __file__ to bytes
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30669
diff changeset
33 module = pycompat.fsencode(__file__)
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
34
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
35 _languages = None
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
36 if (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
37 pycompat.iswindows
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
38 and b'LANGUAGE' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
39 and b'LC_ALL' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
40 and b'LC_MESSAGES' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
41 and b'LANG' not in encoding.environ
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
42 ):
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
43 # Try to detect UI language by "User Interface Language Management" API
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
44 # if no locale variables are set. Note that locale.getdefaultlocale()
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
45 # uses GetLocaleInfo(), which may be different from UI language.
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
46 # (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
47 try:
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
48 import ctypes
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
49
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
50 # pytype: disable=module-attr
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
51 langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
52 # pytype: enable=module-attr
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
53
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
54 _languages = [locale.windows_locale[langid]]
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
55 except (ImportError, AttributeError, KeyError):
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
56 # ctypes not found or unknown langid
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
57 pass
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
58
22638
0d0350cfc7ab i18n: use datapath for i18n like for templates and help
Mads Kiilerich <madski@unity3d.com>
parents: 21987
diff changeset
59
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
60 datapath = pycompat.fsdecode(resourceutil.datapath)
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
61 localedir = os.path.join(datapath, 'locale')
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
62 t = gettextmod.translation('hg', localedir, _languages, fallback=True)
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
63 try:
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
64 _ugettext = t.ugettext # pytype: disable=attribute-error
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
65 except AttributeError:
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
66 _ugettext = t.gettext
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
67
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
68
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
69 _msgcache = {} # encoding: {message: translation}
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
70
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
71
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
72 def gettext(message):
46807
2c0e35f6957a typing: mark the argument to mercurial.i18n.gettext() non-Optional
Matt Harbison <matt_harbison@yahoo.com>
parents: 46642
diff changeset
73 # type: (bytes) -> bytes
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
74 """Translate message.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
75
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
76 The message is looked up in the catalog to get a Unicode string,
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
77 which is encoded in the local encoding before being returned.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
78
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
79 Important: message is restricted to characters in the encoding
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
80 given by sys.getdefaultencoding() which is most likely 'ascii'.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
81 """
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
82 # If message is None, t.ugettext will return u'None' as the
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
83 # translation whereas our callers expect us to return None.
22638
0d0350cfc7ab i18n: use datapath for i18n like for templates and help
Mads Kiilerich <madski@unity3d.com>
parents: 21987
diff changeset
84 if message is None or not _ugettext:
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
85 return message
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
86
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
87 cache = _msgcache.setdefault(encoding.encoding, {})
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
88 if message not in cache:
38312
79dd61a4554f py3: replace `unicode` with pycompat.unicode
Pulkit Goyal <7895pulkit@gmail.com>
parents: 36835
diff changeset
89 if type(message) is pycompat.unicode:
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
90 # goofy unicode docstrings in test
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
91 paragraphs = message.split(u'\n\n') # type: List[pycompat.unicode]
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
92 else:
40254
dd83aafdb64a py3: get around unicode docstrings in test-encoding-textwrap.t and test-help.t
Yuya Nishihara <yuya@tcha.org>
parents: 38312
diff changeset
93 # should be ascii, but we have unicode docstrings in test, which
dd83aafdb64a py3: get around unicode docstrings in test-encoding-textwrap.t and test-help.t
Yuya Nishihara <yuya@tcha.org>
parents: 38312
diff changeset
94 # are converted to utf-8 bytes on Python 3.
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
95 paragraphs = [p.decode("utf-8") for p in message.split(b'\n\n')]
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
96 # Be careful not to translate the empty string -- it holds the
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
97 # meta data of the .po file.
29415
47fb4beb992b i18n: use unicode literal
Gregory Szorc <gregory.szorc@gmail.com>
parents: 28674
diff changeset
98 u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
99 try:
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
100 # encoding.tolocal cannot be used since it will first try to
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
101 # decode the Unicode string. Calling u.decode(enc) really
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
102 # means u.encode(sys.getdefaultencoding()).decode(enc). Since
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
103 # the Python encoding defaults to 'ascii', this fails if the
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
104 # translated string use non-ASCII characters.
30050
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
105 encodingstr = pycompat.sysstr(encoding.encoding)
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
106 cache[message] = u.encode(encodingstr, "replace")
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
107 except LookupError:
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
108 # An unknown encoding results in a LookupError.
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
109 cache[message] = message
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
110 return cache[message]
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
111
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
112
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
113 def _plain():
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
114 if (
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
115 b'HGPLAIN' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
116 and b'HGPLAINEXCEPT' not in encoding.environ
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
117 ):
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
118 return False
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
119 exceptions = encoding.environ.get(b'HGPLAINEXCEPT', b'').strip().split(b',')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
120 return b'i18n' not in exceptions
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
121
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
122
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
123 if _plain():
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
124 _ = lambda message: message # type: Callable[[bytes], bytes]
10455
40dfd46d098f ui: add HGPLAIN environment variable for easier scripting
Brodie Rao <me+hg@dackz.net>
parents: 10263
diff changeset
125 else:
40dfd46d098f ui: add HGPLAIN environment variable for easier scripting
Brodie Rao <me+hg@dackz.net>
parents: 10263
diff changeset
126 _ = gettext