Mercurial > hg
diff i18n/hggettext @ 8542:de150a942ec8
i18n: accurately generate hg.pot
author | Martin Geisler <mg@lazybytes.net> |
---|---|
date | Thu, 21 May 2009 23:43:13 +0200 |
parents | |
children | 1fc1c77d4863 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/i18n/hggettext Thu May 21 23:43:13 2009 +0200 @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# +# hggettext - carefully extract docstrings for Mercurial +# +# Copyright 2009 Matt Mackall <mpm@selenic.com> and others +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2, incorporated herein by reference. + +# The normalize function is taken from pygettext which is distributed +# with Python under the Python License, which is GPL compatible. + +"""Extract docstrings from Mercurial commands. + +Compared to pygettext, this script knows about the cmdtable and table +dictionaries used by Mercurial, and will only extract docstrings from +functions mentioned therein. + +Use xgettext like normal to extract strings marked as translatable and +join the message cataloges to get the final catalog. +""" + +from mercurial import demandimport; demandimport.enable() +import sys, inspect + + +def escape(s): + # The order is important, the backslash must be escaped first + # since the other replacements introduce new backslashes + # themselves. + s = s.replace('\\', '\\\\') + s = s.replace('\n', '\\n') + s = s.replace('\r', '\\r') + s = s.replace('\t', '\\t') + s = s.replace('"', '\\"') + return s + + +def normalize(s): + # This converts the various Python string types into a format that + # is appropriate for .po files, namely much closer to C style. + lines = s.split('\n') + if len(lines) == 1: + s = '"' + escape(s) + '"' + else: + if not lines[-1]: + del lines[-1] + lines[-1] = lines[-1] + '\n' + lines = map(escape, lines) + lineterm = '\\n"\n"' + s = '""\n"' + lineterm.join(lines) + '"' + return s + + +def poentry(path, lineno, s): + return ('#: %s:%d\n' % (path, lineno) + + 'msgid %s\n' % normalize(s) + + 'msgstr ""\n') + + +def offset(src, doc, name, default): + """Compute offset or issue a warning on stdout.""" + # Backslashes in doc appear doubled in src. + end = src.find(doc.replace('\\', '\\\\')) + if end == -1: + # This can happen if the docstring contains unnecessary escape + # sequences such as \" in a triple-quoted string. The problem + # is that \" is turned into " and so doc wont appear in src. + sys.stderr.write("warning: unknown offset in %s, assuming %d lines\n" + % (name, default)) + return default + else: + return src.count('\n', 0, end) + + +def importpath(path): + """Import a path like foo/bar/baz.py and return the baz module.""" + if path.endswith('.py'): + path = path[:-3] + if path.endswith('/__init__'): + path = path[:-9] + path = path.replace('/', '.') + mod = __import__(path) + for comp in path.split('.')[1:]: + mod = getattr(mod, comp) + return mod + + +def docstrings(path): + """Extract docstrings from path. + + This respects the Mercurial cmdtable/table convention and will + only extract docstrings from functions mentioned in these tables. + """ + mod = importpath(path) + if mod.__doc__: + src = open(path).read() + lineno = 1 + offset(src, mod.__doc__, path, 7) + print poentry(path, lineno, mod.__doc__) + + cmdtable = getattr(mod, 'cmdtable', {}) + if not cmdtable: + # Maybe we are processing mercurial.commands? + cmdtable = getattr(mod, 'table', {}) + + for entry in cmdtable.itervalues(): + func = entry[0] + if func.__doc__: + src = inspect.getsource(func) + name = "%s.%s" % (path, func.__name__) + lineno = func.func_code.co_firstlineno + lineno += offset(src, func.__doc__, name, 1) + print poentry(path, lineno, func.__doc__) + + +if __name__ == "__main__": + for path in sys.argv[1:]: + docstrings(path)