changeset 8542:de150a942ec8

i18n: accurately generate hg.pot
author Martin Geisler <mg@lazybytes.net>
date Thu, 21 May 2009 23:43:13 +0200
parents 06ace50443f6
children 9f0d085469c5
files Makefile i18n/hggettext
diffstat 2 files changed, 120 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Thu May 21 22:53:26 2009 +0200
+++ b/Makefile	Thu May 21 23:43:13 2009 +0200
@@ -81,8 +81,8 @@
 
 i18n/hg.pot: $(PYTHON_FILES)
 	mkdir -p i18n
-	pygettext -d hg -p i18n --docstrings \
-	  mercurial/commands.py hgext/*.py hgext/*/__init__.py
+	$(PYTHON) i18n/hggettext mercurial/commands.py \
+	  hgext/*.py hgext/*/__init__.py > i18n/hg.pot
         # All strings marked for translation in Mercurial contain
         # ASCII characters only. But some files contain string
         # literals like this '\037\213'. xgettext thinks it has to
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/hggettext	Thu May 21 23:43:13 2009 +0200
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+#
+# hggettext - carefully extract docstrings for Mercurial
+#
+# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+# The normalize function is taken from pygettext which is distributed
+# with Python under the Python License, which is GPL compatible.
+
+"""Extract docstrings from Mercurial commands.
+
+Compared to pygettext, this script knows about the cmdtable and table
+dictionaries used by Mercurial, and will only extract docstrings from
+functions mentioned therein.
+
+Use xgettext like normal to extract strings marked as translatable and
+join the message cataloges to get the final catalog.
+"""
+
+from mercurial import demandimport; demandimport.enable()
+import sys, inspect
+
+
+def escape(s):
+    # The order is important, the backslash must be escaped first
+    # since the other replacements introduce new backslashes
+    # themselves.
+    s = s.replace('\\', '\\\\')
+    s = s.replace('\n', '\\n')
+    s = s.replace('\r', '\\r')
+    s = s.replace('\t', '\\t')
+    s = s.replace('"', '\\"')
+    return s
+
+
+def normalize(s):
+    # This converts the various Python string types into a format that
+    # is appropriate for .po files, namely much closer to C style.
+    lines = s.split('\n')
+    if len(lines) == 1:
+        s = '"' + escape(s) + '"'
+    else:
+        if not lines[-1]:
+            del lines[-1]
+            lines[-1] = lines[-1] + '\n'
+        lines = map(escape, lines)
+        lineterm = '\\n"\n"'
+        s = '""\n"' + lineterm.join(lines) + '"'
+    return s
+
+
+def poentry(path, lineno, s):
+    return ('#: %s:%d\n' % (path, lineno) +
+            'msgid %s\n' % normalize(s) +
+            'msgstr ""\n')
+
+
+def offset(src, doc, name, default):
+    """Compute offset or issue a warning on stdout."""
+    # Backslashes in doc appear doubled in src.
+    end = src.find(doc.replace('\\', '\\\\'))
+    if end == -1:
+        # This can happen if the docstring contains unnecessary escape
+        # sequences such as \" in a triple-quoted string. The problem
+        # is that \" is turned into " and so doc wont appear in src.
+        sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n"
+                         % (name, default))
+        return default
+    else:
+        return src.count('\n', 0, end)
+
+
+def importpath(path):
+    """Import a path like foo/bar/baz.py and return the baz module."""
+    if path.endswith('.py'):
+        path = path[:-3]
+    if path.endswith('/__init__'):
+        path = path[:-9]
+    path = path.replace('/', '.')
+    mod = __import__(path)
+    for comp in path.split('.')[1:]:
+        mod = getattr(mod, comp)
+    return mod
+
+
+def docstrings(path):
+    """Extract docstrings from path.
+
+    This respects the Mercurial cmdtable/table convention and will
+    only extract docstrings from functions mentioned in these tables.
+    """
+    mod = importpath(path)
+    if mod.__doc__:
+        src = open(path).read()
+        lineno = 1 + offset(src, mod.__doc__, path, 7)
+        print poentry(path, lineno, mod.__doc__)
+
+    cmdtable = getattr(mod, 'cmdtable', {})
+    if not cmdtable:
+        # Maybe we are processing mercurial.commands?
+        cmdtable = getattr(mod, 'table', {})
+
+    for entry in cmdtable.itervalues():
+        func = entry[0]
+        if func.__doc__:
+            src = inspect.getsource(func)
+            name = "%s.%s" % (path, func.__name__)
+            lineno = func.func_code.co_firstlineno
+            lineno += offset(src, func.__doc__, name, 1)
+            print poentry(path, lineno, func.__doc__)
+
+
+if __name__ == "__main__":
+    for path in sys.argv[1:]:
+        docstrings(path)