--- a/Makefile Thu May 21 22:53:26 2009 +0200
+++ b/Makefile Thu May 21 23:43:13 2009 +0200
@@ -81,8 +81,8 @@
i18n/hg.pot: $(PYTHON_FILES)
mkdir -p i18n
- pygettext -d hg -p i18n --docstrings \
- mercurial/commands.py hgext/*.py hgext/*/__init__.py
+ $(PYTHON) i18n/hggettext mercurial/commands.py \
+ hgext/*.py hgext/*/__init__.py > i18n/hg.pot
# All strings marked for translation in Mercurial contain
# ASCII characters only. But some files contain string
# literals like this '\037\213'. xgettext thinks it has to
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/hggettext Thu May 21 23:43:13 2009 +0200
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+#
+# hggettext - carefully extract docstrings for Mercurial
+#
+# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+# The normalize function is taken from pygettext which is distributed
+# with Python under the Python License, which is GPL compatible.
+
+"""Extract docstrings from Mercurial commands.
+
+Compared to pygettext, this script knows about the cmdtable and table
+dictionaries used by Mercurial, and will only extract docstrings from
+functions mentioned therein.
+
+Use xgettext like normal to extract strings marked as translatable and
+join the message cataloges to get the final catalog.
+"""
+
+from mercurial import demandimport; demandimport.enable()
+import sys, inspect
+
+
+def escape(s):
+ # The order is important, the backslash must be escaped first
+ # since the other replacements introduce new backslashes
+ # themselves.
+ s = s.replace('\\', '\\\\')
+ s = s.replace('\n', '\\n')
+ s = s.replace('\r', '\\r')
+ s = s.replace('\t', '\\t')
+ s = s.replace('"', '\\"')
+ return s
+
+
+def normalize(s):
+ # This converts the various Python string types into a format that
+ # is appropriate for .po files, namely much closer to C style.
+ lines = s.split('\n')
+ if len(lines) == 1:
+ s = '"' + escape(s) + '"'
+ else:
+ if not lines[-1]:
+ del lines[-1]
+ lines[-1] = lines[-1] + '\n'
+ lines = map(escape, lines)
+ lineterm = '\\n"\n"'
+ s = '""\n"' + lineterm.join(lines) + '"'
+ return s
+
+
+def poentry(path, lineno, s):
+ return ('#: %s:%d\n' % (path, lineno) +
+ 'msgid %s\n' % normalize(s) +
+ 'msgstr ""\n')
+
+
+def offset(src, doc, name, default):
+ """Compute offset or issue a warning on stdout."""
+ # Backslashes in doc appear doubled in src.
+ end = src.find(doc.replace('\\', '\\\\'))
+ if end == -1:
+ # This can happen if the docstring contains unnecessary escape
+ # sequences such as \" in a triple-quoted string. The problem
+ # is that \" is turned into " and so doc wont appear in src.
+ sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n"
+ % (name, default))
+ return default
+ else:
+ return src.count('\n', 0, end)
+
+
+def importpath(path):
+ """Import a path like foo/bar/baz.py and return the baz module."""
+ if path.endswith('.py'):
+ path = path[:-3]
+ if path.endswith('/__init__'):
+ path = path[:-9]
+ path = path.replace('/', '.')
+ mod = __import__(path)
+ for comp in path.split('.')[1:]:
+ mod = getattr(mod, comp)
+ return mod
+
+
+def docstrings(path):
+ """Extract docstrings from path.
+
+ This respects the Mercurial cmdtable/table convention and will
+ only extract docstrings from functions mentioned in these tables.
+ """
+ mod = importpath(path)
+ if mod.__doc__:
+ src = open(path).read()
+ lineno = 1 + offset(src, mod.__doc__, path, 7)
+ print poentry(path, lineno, mod.__doc__)
+
+ cmdtable = getattr(mod, 'cmdtable', {})
+ if not cmdtable:
+ # Maybe we are processing mercurial.commands?
+ cmdtable = getattr(mod, 'table', {})
+
+ for entry in cmdtable.itervalues():
+ func = entry[0]
+ if func.__doc__:
+ src = inspect.getsource(func)
+ name = "%s.%s" % (path, func.__name__)
+ lineno = func.func_code.co_firstlineno
+ lineno += offset(src, func.__doc__, name, 1)
+ print poentry(path, lineno, func.__doc__)
+
+
+if __name__ == "__main__":
+ for path in sys.argv[1:]:
+ docstrings(path)