mercurial/i18n.py
author Raphaël Gomès <rgomes@octobus.net>
Mon, 29 Jul 2024 20:35:44 +0200
changeset 52175 bd43465af568
parent 51863 f4733654f144
permissions -rw-r--r--
fncache: add attribute to check whether we're using dotencode This will make it easy to know if we can use the Rust implementation that doesn't support older forms of encoding.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     1
# i18n.py - internationalization support for mercurial
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     2
#
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 46807
diff changeset
     3
# Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
8226
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     4
#
8b2cd04a6e97 put license and copyright info into comment blocks
Martin Geisler <mg@lazybytes.net>
parents: 8225
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 9538
diff changeset
     6
# GNU General Public License version 2 or any later version.
1400
cf9a1233738a i18n first part: make '_' available for files who need it
Benoit Boissinot <benoit.boissinot@ens-lyon.org
parents:
diff changeset
     7
51863
f4733654f144 typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents: 51717
diff changeset
     8
from __future__ import annotations
25955
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
     9
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
    10
import gettext as gettextmod
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
    11
import locale
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
    12
import os
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
    13
import sys
2c07c6884394 i18n: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 23031
diff changeset
    14
51285
9d3721552b6c pytype: import typing directly
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 50929
diff changeset
    15
from typing import (
51717
f841de63a5aa typing: add type hints to `i18n._msgcache`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    16
    Dict,
51285
9d3721552b6c pytype: import typing directly
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 50929
diff changeset
    17
    List,
9d3721552b6c pytype: import typing directly
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 50929
diff changeset
    18
)
9d3721552b6c pytype: import typing directly
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 50929
diff changeset
    19
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    20
from .utils import resourceutil
30050
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
    21
from . import (
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
    22
    encoding,
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
    23
    pycompat,
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
    24
)
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
    25
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
    26
# modelled after templater.templatepath:
14975
b64538363dbe i18n: use getattr instead of hasattr
Augie Fackler <durin42@gmail.com>
parents: 13849
diff changeset
    27
if getattr(sys, 'frozen', None) is not None:
30669
10b17ed9b591 py3: replace sys.executable with pycompat.sysexecutable
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30639
diff changeset
    28
    module = pycompat.sysexecutable
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
    29
else:
31074
2912b06905dc py3: use pycompat.fsencode() to convert __file__ to bytes
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30669
diff changeset
    30
    module = pycompat.fsencode(__file__)
7650
85ae7aaf08e9 i18n: lookup .mo files in private locale/ directory
Martin Geisler <mg@daimi.au.dk>
parents: 3888
diff changeset
    31
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    32
_languages = None
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    33
if (
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    34
    pycompat.iswindows
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    35
    and b'LANGUAGE' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    36
    and b'LC_ALL' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    37
    and b'LC_MESSAGES' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    38
    and b'LANG' not in encoding.environ
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    39
):
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    40
    # Try to detect UI language by "User Interface Language Management" API
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    41
    # if no locale variables are set. Note that locale.getdefaultlocale()
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    42
    # uses GetLocaleInfo(), which may be different from UI language.
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    43
    # (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    44
    try:
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    45
        import ctypes
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    46
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
    47
        # pytype: disable=module-attr
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    48
        langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
    49
        # pytype: enable=module-attr
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
    50
21987
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    51
        _languages = [locale.windows_locale[langid]]
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    52
    except (ImportError, AttributeError, KeyError):
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    53
        # ctypes not found or unknown langid
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    54
        pass
4953cd193e84 i18n: detect UI language without POSIX-style locale variable on Windows (BC)
Yuya Nishihara <yuya@tcha.org>
parents: 21746
diff changeset
    55
22638
0d0350cfc7ab i18n: use datapath for i18n like for templates and help
Mads Kiilerich <madski@unity3d.com>
parents: 21987
diff changeset
    56
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    57
datapath = pycompat.fsdecode(resourceutil.datapath)
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    58
localedir = os.path.join(datapath, 'locale')
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    59
t = gettextmod.translation('hg', localedir, _languages, fallback=True)
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    60
try:
46642
b9f40b743627 typing: add type annotations to mercurial/i18n.py
Matt Harbison <matt_harbison@yahoo.com>
parents: 43673
diff changeset
    61
    _ugettext = t.ugettext  # pytype: disable=attribute-error
43673
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    62
except AttributeError:
f0bee3b1b847 i18n: get datapath directly from resourceutil
Martin von Zweigbergk <martinvonz@google.com>
parents: 43506
diff changeset
    63
    _ugettext = t.gettext
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    64
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    65
51717
f841de63a5aa typing: add type hints to `i18n._msgcache`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    66
_msgcache: Dict[
f841de63a5aa typing: add type hints to `i18n._msgcache`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    67
    bytes, Dict[bytes, bytes]
f841de63a5aa typing: add type hints to `i18n._msgcache`
Matt Harbison <matt_harbison@yahoo.com>
parents: 51703
diff changeset
    68
] = {}  # encoding: {message: translation}
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    69
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
    70
51287
f15cb5111a1e pytype: move some type comment to proper annotation
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51285
diff changeset
    71
def gettext(message: bytes) -> bytes:
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    72
    """Translate message.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    73
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    74
    The message is looked up in the catalog to get a Unicode string,
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    75
    which is encoded in the local encoding before being returned.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    76
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    77
    Important: message is restricted to characters in the encoding
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    78
    given by sys.getdefaultencoding() which is most likely 'ascii'.
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    79
    """
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    80
    # If message is None, t.ugettext will return u'None' as the
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    81
    # translation whereas our callers expect us to return None.
22638
0d0350cfc7ab i18n: use datapath for i18n like for templates and help
Mads Kiilerich <madski@unity3d.com>
parents: 21987
diff changeset
    82
    if message is None or not _ugettext:
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    83
        return message
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
    84
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
    85
    cache = _msgcache.setdefault(encoding.encoding, {})
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
    86
    if message not in cache:
48934
06de08b36c82 py3: use str instead of pycompat.unicode
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48875
diff changeset
    87
        if type(message) is str:
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    88
            # goofy unicode docstrings in test
51288
8b2ea2246a5f pytype: convert type comment for inline variable too
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51287
diff changeset
    89
            paragraphs: List[str] = message.split(u'\n\n')
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    90
        else:
40254
dd83aafdb64a py3: get around unicode docstrings in test-encoding-textwrap.t and test-help.t
Yuya Nishihara <yuya@tcha.org>
parents: 38312
diff changeset
    91
            # should be ascii, but we have unicode docstrings in test, which
dd83aafdb64a py3: get around unicode docstrings in test-encoding-textwrap.t and test-help.t
Yuya Nishihara <yuya@tcha.org>
parents: 38312
diff changeset
    92
            # are converted to utf-8 bytes on Python 3.
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
    93
            paragraphs = [p.decode("utf-8") for p in message.split(b'\n\n')]
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    94
        # Be careful not to translate the empty string -- it holds the
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    95
        # meta data of the .po file.
29415
47fb4beb992b i18n: use unicode literal
Gregory Szorc <gregory.szorc@gmail.com>
parents: 28674
diff changeset
    96
        u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    97
        try:
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    98
            # encoding.tolocal cannot be used since it will first try to
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
    99
            # decode the Unicode string. Calling u.decode(enc) really
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
   100
            # means u.encode(sys.getdefaultencoding()).decode(enc). Since
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
   101
            # the Python encoding defaults to 'ascii', this fails if the
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
   102
            # translated string use non-ASCII characters.
30050
d229be12e256 py3: convert to unicode to pass into encode()
Pulkit Goyal <7895pulkit@gmail.com>
parents: 30035
diff changeset
   103
            encodingstr = pycompat.sysstr(encoding.encoding)
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
   104
            cache[message] = u.encode(encodingstr, "replace")
23031
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
   105
        except LookupError:
3c0983cc279e i18n: cache the result of every gettext call
Augie Fackler <raf@durin42.com>
parents: 22638
diff changeset
   106
            # An unknown encoding results in a LookupError.
34660
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
   107
            cache[message] = message
d00ec62d156f i18n: cache translated messages per encoding
Yuya Nishihara <yuya@tcha.org>
parents: 34645
diff changeset
   108
    return cache[message]
7651
5b5036ef847a i18n: encode output in user's local encoding
Martin Geisler <mg@daimi.au.dk>
parents: 7650
diff changeset
   109
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
   110
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
   111
def _plain():
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
   112
    if (
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   113
        b'HGPLAIN' not in encoding.environ
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   114
        and b'HGPLAINEXCEPT' not in encoding.environ
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
   115
    ):
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
   116
        return False
43077
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   117
    exceptions = encoding.environ.get(b'HGPLAINEXCEPT', b'').strip().split(b',')
687b865b95ad formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents: 43076
diff changeset
   118
    return b'i18n' not in exceptions
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
   119
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 40254
diff changeset
   120
13849
9f97de157aad HGPLAIN: allow exceptions to plain mode, like i18n, via HGPLAINEXCEPT
Brodie Rao <brodie@bitheap.org>
parents: 11403
diff changeset
   121
if _plain():
51287
f15cb5111a1e pytype: move some type comment to proper annotation
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51285
diff changeset
   122
f15cb5111a1e pytype: move some type comment to proper annotation
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51285
diff changeset
   123
    def _(message: bytes) -> bytes:
f15cb5111a1e pytype: move some type comment to proper annotation
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51285
diff changeset
   124
        return message
f15cb5111a1e pytype: move some type comment to proper annotation
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 51285
diff changeset
   125
10455
40dfd46d098f ui: add HGPLAIN environment variable for easier scripting
Brodie Rao <me+hg@dackz.net>
parents: 10263
diff changeset
   126
else:
40dfd46d098f ui: add HGPLAIN environment variable for easier scripting
Brodie Rao <me+hg@dackz.net>
parents: 10263
diff changeset
   127
    _ = gettext