changeset 11391:9b404640d795

Merge with hg-i18n
author Martin Geisler <mg@lazybytes.net>
date Sat, 19 Jun 2010 17:06:11 +0200
parents 11cd65611f3f (diff) b2c0bc41165f (current diff)
children a87906461835
files
diffstat 12 files changed, 1967 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Fri Jun 18 15:22:56 2010 +0200
+++ b/.hgignore	Sat Jun 19 17:06:11 2010 +0200
@@ -25,6 +25,7 @@
 MANIFEST
 patches
 mercurial/__version__.py
+mercurial.egg-info
 Output/Mercurial-*.exe
 .DS_Store
 tags
--- a/Makefile	Fri Jun 18 15:22:56 2010 +0200
+++ b/Makefile	Sat Jun 19 17:06:11 2010 +0200
@@ -100,6 +100,7 @@
 	  --copyright-holder "Matt Mackall <mpm@selenic.com> and others" \
 	  --from-code ISO-8859-1 --join --sort-by-file \
 	  -d hg -p i18n -o hg.pot
+	$(PYTHON) i18n/posplit i18n/hg.pot
 
 %.po: i18n/hg.pot
 	msgmerge --no-location --update $@ $^
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/polib.LICENSE	Sat Jun 19 17:06:11 2010 +0200
@@ -0,0 +1,19 @@
+copyright (c) 2006-2010 David JEAN LOUIS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/polib.py	Sat Jun 19 17:06:11 2010 +0200
@@ -0,0 +1,1680 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# License: MIT (see LICENSE file provided)
+# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
+
+"""
+**polib** allows you to manipulate, create, modify gettext files (pot, po
+and mo files).  You can load existing files, iterate through it's entries,
+add, modify entries, comments or metadata, etc... or create new po files
+from scratch.
+
+**polib** provides a simple and pythonic API, exporting only three
+convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
+four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
+new files/entries.
+
+**Basic example**:
+
+>>> import polib
+>>> # load an existing po file
+>>> po = polib.pofile('tests/test_utf8.po')
+>>> for entry in po:
+...     # do something with entry...
+...     pass
+>>> # add an entry
+>>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
+>>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
+>>> po.append(entry)
+>>> # to save our modified po file:
+>>> # po.save()
+>>> # or you may want to compile the po file
+>>> # po.save_as_mofile('tests/test_utf8.mo')
+"""
+
+__author__    = 'David JEAN LOUIS <izimobil@gmail.com>'
+__version__   = '0.5.2'
+__all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+                 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+
+import codecs
+import struct
+import textwrap
+import types
+import re
+
+default_encoding = 'utf-8'
+
+# function pofile() {{{
+
+def pofile(fpath, **kwargs):
+    """
+    Convenience function that parse the po/pot file *fpath* and return
+    a POFile instance.
+
+    **Keyword arguments**:
+      - *fpath*: string, full or relative path to the po/pot file to parse
+      - *wrapwidth*: integer, the wrap width, only useful when -w option was
+        passed to xgettext (optional, default to 78)
+      - *autodetect_encoding*: boolean, if set to False the function will
+        not try to detect the po file encoding (optional, default to True)
+      - *encoding*: string, an encoding, only relevant if autodetect_encoding
+        is set to False
+      - *check_for_duplicates*: whether to check for duplicate entries when
+        adding entries to the file, default: False (optional)
+
+    **Example**:
+
+    >>> import polib
+    >>> po = polib.pofile('tests/test_weird_occurrences.po',
+    ...     check_for_duplicates=True)
+    >>> po #doctest: +ELLIPSIS
+    <POFile instance at ...>
+    >>> import os, tempfile
+    >>> all_attrs = ('msgctxt', 'msgid', 'msgstr', 'msgid_plural', 
+    ...              'msgstr_plural', 'obsolete', 'comment', 'tcomment', 
+    ...              'occurrences', 'flags', 'previous_msgctxt', 
+    ...              'previous_msgid', 'previous_msgid_plural')
+    >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
+    ...     orig_po = polib.pofile('tests/'+fname)
+    ...     tmpf = tempfile.NamedTemporaryFile().name
+    ...     orig_po.save(tmpf)
+    ...     try:
+    ...         new_po = polib.pofile(tmpf)
+    ...         for old, new in zip(orig_po, new_po):
+    ...             for attr in all_attrs:
+    ...                 if getattr(old, attr) != getattr(new, attr):
+    ...                     getattr(old, attr)
+    ...                     getattr(new, attr)
+    ...     finally:
+    ...         os.unlink(tmpf)
+    >>> po_file = polib.pofile('tests/test_save_as_mofile.po')
+    >>> tmpf = tempfile.NamedTemporaryFile().name
+    >>> po_file.save_as_mofile(tmpf)
+    >>> try:
+    ...     mo_file = polib.mofile(tmpf)
+    ...     for old, new in zip(po_file, mo_file):
+    ...         if po_file._encode(old.msgid) != mo_file._encode(new.msgid):
+    ...             'OLD: ', po_file._encode(old.msgid)
+    ...             'NEW: ', mo_file._encode(new.msgid)
+    ...         if po_file._encode(old.msgstr) != mo_file._encode(new.msgstr):
+    ...             'OLD: ', po_file._encode(old.msgstr)
+    ...             'NEW: ', mo_file._encode(new.msgstr)
+    ...             print new.msgstr
+    ... finally:
+    ...     os.unlink(tmpf)
+    """
+    if kwargs.get('autodetect_encoding', True) == True:
+        enc = detect_encoding(fpath)
+    else:
+        enc = kwargs.get('encoding', default_encoding)
+    check_for_duplicates = kwargs.get('check_for_duplicates', False)
+    parser = _POFileParser(
+        fpath,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False)
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
+
+# }}}
+# function mofile() {{{
+
+def mofile(fpath, **kwargs):
+    """
+    Convenience function that parse the mo file *fpath* and return
+    a MOFile instance.
+
+    **Keyword arguments**:
+      - *fpath*: string, full or relative path to the mo file to parse
+      - *wrapwidth*: integer, the wrap width, only useful when -w option was
+        passed to xgettext to generate the po file that was used to format
+        the mo file (optional, default to 78)
+      - *autodetect_encoding*: boolean, if set to False the function will
+        not try to detect the po file encoding (optional, default to True)
+      - *encoding*: string, an encoding, only relevant if autodetect_encoding
+        is set to False
+      - *check_for_duplicates*: whether to check for duplicate entries when
+        adding entries to the file, default: False (optional)
+
+    **Example**:
+
+    >>> import polib
+    >>> mo = polib.mofile('tests/test_utf8.mo', check_for_duplicates=True)
+    >>> mo #doctest: +ELLIPSIS
+    <MOFile instance at ...>
+    >>> import os, tempfile
+    >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
+    ...     orig_mo = polib.mofile('tests/'+fname)
+    ...     tmpf = tempfile.NamedTemporaryFile().name
+    ...     orig_mo.save(tmpf)
+    ...     try:
+    ...         new_mo = polib.mofile(tmpf)
+    ...         for old, new in zip(orig_mo, new_mo):
+    ...             if old.msgid != new.msgid:
+    ...                 old.msgstr
+    ...                 new.msgstr
+    ...     finally:
+    ...         os.unlink(tmpf)
+    """
+    if kwargs.get('autodetect_encoding', True) == True:
+        enc = detect_encoding(fpath, True)
+    else:
+        enc = kwargs.get('encoding', default_encoding)
+    parser = _MOFileParser(
+        fpath,
+        encoding=enc,
+        check_for_duplicates=kwargs.get('check_for_duplicates', False)
+    )
+    instance = parser.parse()
+    instance.wrapwidth = kwargs.get('wrapwidth', 78)
+    return instance
+
+# }}}
+# function detect_encoding() {{{
+
+def detect_encoding(fpath, binary_mode=False):
+    """
+    Try to detect the encoding used by the file *fpath*. The function will
+    return polib default *encoding* if it's unable to detect it.
+
+    **Keyword argument**:
+      - *fpath*: string, full or relative path to the mo file to parse.
+
+    **Examples**:
+
+    >>> print(detect_encoding('tests/test_noencoding.po'))
+    utf-8
+    >>> print(detect_encoding('tests/test_utf8.po'))
+    UTF-8
+    >>> print(detect_encoding('tests/test_utf8.mo', True))
+    UTF-8
+    >>> print(detect_encoding('tests/test_iso-8859-15.po'))
+    ISO_8859-15
+    >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
+    ISO_8859-15
+    """
+    import re
+    rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
+    if binary_mode:
+        mode = 'rb'
+    else:
+        mode = 'r'
+    f = open(fpath, mode)
+    for l in f.readlines():
+        match = rx.search(l)
+        if match:
+            f.close()
+            return match.group(1).strip()
+    f.close()
+    return default_encoding
+
+# }}}
+# function escape() {{{
+
+def escape(st):
+    """
+    Escape special chars and return the given string *st*.
+
+    **Examples**:
+
+    >>> escape('\\t and \\n and \\r and " and \\\\')
+    '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
+    """
+    return st.replace('\\', r'\\')\
+             .replace('\t', r'\t')\
+             .replace('\r', r'\r')\
+             .replace('\n', r'\n')\
+             .replace('\"', r'\"')
+
+# }}}
+# function unescape() {{{
+
+def unescape(st):
+    """
+    Unescape special chars and return the given string *st*.
+
+    **Examples**:
+
+    >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
+    '\\t and \\n and \\r and " and \\\\'
+    >>> unescape(r'\\n')
+    '\\n'
+    >>> unescape(r'\\\\n')
+    '\\\\n'
+    >>> unescape(r'\\\\n\\n')
+    '\\\\n\\n'
+    """
+    def unescape_repl(m):
+        m = m.group(1)
+        if m == 'n':
+            return '\n'
+        if m == 't':
+            return '\t'
+        if m == 'r':
+            return '\r'
+        if m == '\\':
+            return '\\'
+        return m # handles escaped double quote
+    return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
+
+# }}}
+# class _BaseFile {{{
+
+class _BaseFile(list):
+    """
+    Common parent class for POFile and MOFile classes.
+    This class must **not** be instanciated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Constructor.
+
+        **Keyword arguments**:
+          - *fpath*: string, path to po or mo file
+          - *wrapwidth*: integer, the wrap width, only useful when -w option
+            was passed to xgettext to generate the po file that was used to
+            format the mo file, default to 78 (optional),
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
+        """
+        list.__init__(self)
+        # the opened file handle
+        self.fpath = kwargs.get('fpath')
+        # the width at which lines should be wrapped
+        self.wrapwidth = kwargs.get('wrapwidth', 78)
+        # the file encoding
+        self.encoding = kwargs.get('encoding', default_encoding)
+        # whether to check for duplicate entries or not
+        self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
+        # header
+        self.header = ''
+        # both po and mo files have metadata
+        self.metadata = {}
+        self.metadata_is_fuzzy = 0
+
+    def __str__(self):
+        """
+        String representation of the file.
+        """
+        ret = []
+        entries = [self.metadata_as_entry()] + \
+                  [e for e in self if not e.obsolete]
+        for entry in entries:
+            ret.append(entry.__str__(self.wrapwidth))
+        for entry in self.obsolete_entries():
+            ret.append(entry.__str__(self.wrapwidth))
+        return '\n'.join(ret)
+
+    def __contains__(self, entry):
+        """
+        Overriden method to implement the membership test (in and not in).
+        The method considers that an entry is in the file if it finds an 
+        entry that has the same msgid (case sensitive).
+
+        **Keyword argument**:
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> po = POFile()
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='barfoo', msgstr='spam')
+        >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> e4 = POEntry(msgid='spameggs', msgstr='eggs')
+        >>> po.append(e1)
+        >>> po.append(e2)
+        >>> e1 in po
+        True
+        >>> e2 not in po
+        False
+        >>> e3 in po
+        True
+        >>> e4 in po
+        False
+        """
+        return self.find(entry.msgid, by='msgid') is not None
+
+    def append(self, entry):
+        """
+        Overriden method to check for duplicates entries, if a user tries to
+        add an entry that already exists, the method will raise a ValueError
+        exception.
+
+        **Keyword argument**:
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> po = POFile(check_for_duplicates=True)
+        >>> po.append(e1)
+        >>> try:
+        ...     po.append(e2)
+        ... except ValueError, e:
+        ...     unicode(e)
+        u'Entry "foobar" already exists'
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).append(entry)
+
+    def insert(self, index, entry):
+        """
+        Overriden method to check for duplicates entries, if a user tries to
+        insert an entry that already exists, the method will raise a ValueError
+        exception.
+
+        **Keyword arguments**:
+          - *index*: index at which the entry should be inserted
+          - *entry*: an instance of polib._BaseEntry
+
+        **Tests**:
+        >>> import polib
+        >>> polib.check_for_duplicates = True
+        >>> e1 = POEntry(msgid='foobar', msgstr='spam')
+        >>> e2 = POEntry(msgid='barfoo', msgstr='eggs')
+        >>> e3 = POEntry(msgid='foobar', msgstr='eggs')
+        >>> po = POFile(check_for_duplicates=True)
+        >>> po.insert(0, e1)
+        >>> po.insert(1, e2)
+        >>> try:
+        ...     po.insert(0, e3)
+        ... except ValueError, e:
+        ...     unicode(e)
+        u'Entry "foobar" already exists'
+        """
+        if self.check_for_duplicates and entry in self:
+            raise ValueError('Entry "%s" already exists' % entry.msgid)
+        super(_BaseFile, self).insert(index, entry)
+
+    def __repr__(self):
+        """Return the official string representation of the object."""
+        return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
+    def metadata_as_entry(self):
+        """
+        Return the metadata as an entry:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_fuzzy_header.po')
+        >>> unicode(po) == unicode(open('tests/test_fuzzy_header.po').read())
+        True
+        """
+        e = POEntry(msgid='')
+        mdata = self.ordered_metadata()
+        if mdata:
+            strs = []
+            e._multiline_str['msgstr'] = ''
+            for name, value in mdata:
+                # Strip whitespace off each line in a multi-line entry
+                strs.append('%s: %s' % (name, value))
+            e.msgstr = '\n'.join(strs) + '\n'
+            e._multiline_str['msgstr'] = '__POLIB__NL__'.join(
+                    [s + '\n' for s in strs])
+        if self.metadata_is_fuzzy:
+            e.flags.append('fuzzy')
+        return e
+
+    def save(self, fpath=None, repr_method='__str__'):
+        """
+        Save the po file to file *fpath* if no file handle exists for
+        the object. If there's already an open file and no fpath is
+        provided, then the existing file is rewritten with the modified
+        data.
+
+        **Keyword arguments**:
+          - *fpath*: string, full or relative path to the file.
+          - *repr_method*: string, the method to use for output.
+        """
+        if self.fpath is None and fpath is None:
+            raise IOError('You must provide a file path to save() method')
+        contents = getattr(self, repr_method)()
+        if fpath is None:
+            fpath = self.fpath
+        if repr_method == 'to_binary':
+            fhandle = open(fpath, 'wb')
+        else:
+            fhandle = codecs.open(fpath, 'w', self.encoding)
+            if type(contents) != types.UnicodeType:
+                contents = contents.decode(self.encoding)
+        fhandle.write(contents)
+        fhandle.close()
+
+    def find(self, st, by='msgid'):
+        """
+        Find entry which msgid (or property identified by the *by*
+        attribute) matches the string *st*.
+
+        **Keyword arguments**:
+          - *st*: string, the string to search for
+          - *by*: string, the comparison attribute
+
+        **Examples**:
+
+        >>> po = pofile('tests/test_utf8.po')
+        >>> entry = po.find('Thursday')
+        >>> entry.msgstr
+        u'Jueves'
+        >>> entry = po.find('Some unexistant msgid')
+        >>> entry is None
+        True
+        >>> entry = po.find('Jueves', 'msgstr')
+        >>> entry.msgid
+        u'Thursday'
+        """
+        for e in self:
+            if getattr(e, by) == st:
+                return e
+        return None
+
+    def ordered_metadata(self):
+        """
+        Convenience method that return the metadata ordered. The return
+        value is list of tuples (metadata name, metadata_value).
+        """
+        # copy the dict first
+        metadata = self.metadata.copy()
+        data_order = [
+            'Project-Id-Version',
+            'Report-Msgid-Bugs-To',
+            'POT-Creation-Date',
+            'PO-Revision-Date',
+            'Last-Translator',
+            'Language-Team',
+            'MIME-Version',
+            'Content-Type',
+            'Content-Transfer-Encoding'
+        ]
+        ordered_data = []
+        for data in data_order:
+            try:
+                value = metadata.pop(data)
+                ordered_data.append((data, value))
+            except KeyError:
+                pass
+        # the rest of the metadata won't be ordered there are no specs for this
+        keys = metadata.keys()
+        list(keys).sort()
+        for data in keys:
+            value = metadata[data]
+            ordered_data.append((data, value))
+        return ordered_data
+
+    def to_binary(self):
+        """
+        Return the mofile binary representation.
+        """
+        import array
+        import struct
+        import types
+        offsets = []
+        entries = self.translated_entries()
+        # the keys are sorted in the .mo file
+        def cmp(_self, other):
+            if _self.msgid > other.msgid:
+                return 1
+            elif _self.msgid < other.msgid:
+                return -1
+            else:
+                return 0
+        # add metadata entry
+        entries.sort(cmp)
+        mentry = self.metadata_as_entry()
+        mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
+        entries = [mentry] + entries
+        entries_len = len(entries)
+        ids, strs = '', ''
+        for e in entries:
+            # For each string, we need size and file offset.  Each string is
+            # NUL terminated; the NUL does not count into the size.
+            if e.msgid_plural:
+                indexes = e.msgstr_plural.keys()
+                indexes.sort()
+                msgstr = []
+                for index in indexes:
+                    msgstr.append(e.msgstr_plural[index])
+                msgid = self._encode(e.msgid + '\0' + e.msgid_plural)
+                msgstr = self._encode('\0'.join(msgstr))
+            else:
+                msgid = self._encode(e.msgid)
+                msgstr = self._encode(e.msgstr)
+            offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
+            ids  += msgid  + '\0'
+            strs += msgstr + '\0'
+        # The header is 7 32-bit unsigned integers.
+        keystart = 7*4+16*entries_len
+        # and the values start after the keys
+        valuestart = keystart + len(ids)
+        koffsets = []
+        voffsets = []
+        # The string table first has the list of keys, then the list of values.
+        # Each entry has first the size of the string, then the file offset.
+        for o1, l1, o2, l2 in offsets:
+            koffsets += [l1, o1+keystart]
+            voffsets += [l2, o2+valuestart]
+        offsets = koffsets + voffsets
+        output  = struct.pack("IIIIIII",
+                             0x950412de,        # Magic number
+                             0,                 # Version
+                             entries_len,       # # of entries
+                             7*4,               # start of key index
+                             7*4+entries_len*8, # start of value index
+                             0, 0)              # size and offset of hash table
+        output += array.array("I", offsets).tostring()
+        output += ids
+        output += strs
+        return output
+
+    def _encode(self, mixed):
+        """
+        Encode the given argument with the file encoding if the type is unicode
+        and return the encoded string.
+        """
+        if type(mixed) == types.UnicodeType:
+            return mixed.encode(self.encoding)
+        return mixed
+
+# }}}
+# class POFile {{{
+
+class POFile(_BaseFile):
+    '''
+    Po (or Pot) file reader/writer.
+    POFile objects inherit the list objects methods.
+
+    **Example**:
+
+    >>> po = POFile()
+    >>> entry1 = POEntry(
+    ...     msgid="Some english text",
+    ...     msgstr="Un texte en anglais"
+    ... )
+    >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
+    >>> entry1.comment = "Some useful comment"
+    >>> entry2 = POEntry(
+    ...     msgid="Peace in some languages",
+    ...     msgstr="Pace سلام שלום Hasîtî 和平"
+    ... )
+    >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
+    >>> entry2.comment = "Another useful comment"
+    >>> entry3 = POEntry(
+    ...     msgid='Some entry with quotes " \\"',
+    ...     msgstr='Un message unicode avec des quotes " \\"'
+    ... )
+    >>> entry3.comment = "Test string quoting"
+    >>> po.append(entry1)
+    >>> po.append(entry2)
+    >>> po.append(entry3)
+    >>> po.header = "Some Header"
+    >>> print(po)
+    # Some Header
+    msgid ""
+    msgstr ""
+    <BLANKLINE>
+    #. Some useful comment
+    #: testfile:12 another_file:1
+    msgid "Some english text"
+    msgstr "Un texte en anglais"
+    <BLANKLINE>
+    #. Another useful comment
+    #: testfile:15 another_file:5
+    msgid "Peace in some languages"
+    msgstr "Pace سلام שלום Hasîtî 和平"
+    <BLANKLINE>
+    #. Test string quoting
+    msgid "Some entry with quotes \\" \\""
+    msgstr "Un message unicode avec des quotes \\" \\""
+    <BLANKLINE>
+    '''
+
+    def __str__(self):
+        """Return the string representation of the po file"""
+        ret, headers = '', self.header.split('\n')
+        for header in headers:
+            if header[:1] in [',', ':']:
+                ret += '#%s\n' % header
+            else:
+                ret += '# %s\n' % header
+        return ret + _BaseFile.__str__(self)
+
+    def save_as_mofile(self, fpath):
+        """
+        Save the binary representation of the file to *fpath*.
+
+        **Keyword arguments**:
+          - *fpath*: string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method that return the percentage of translated
+        messages.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> po.percent_translated()
+        50
+        >>> po = POFile()
+        >>> po.percent_translated()
+        100
+        """
+        total = len([e for e in self if not e.obsolete])
+        if total == 0:
+            return 100
+        translated = len(self.translated_entries())
+        return int((100.00 / float(total)) * translated)
+
+    def translated_entries(self):
+        """
+        Convenience method that return a list of translated entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.translated_entries())
+        6
+        """
+        return [e for e in self if e.translated()]
+
+    def untranslated_entries(self):
+        """
+        Convenience method that return a list of untranslated entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.untranslated_entries())
+        4
+        """
+        return [e for e in self if not e.translated() and not e.obsolete \
+                and not 'fuzzy' in e.flags]
+
+    def fuzzy_entries(self):
+        """
+        Convenience method that return the list of 'fuzzy' entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.fuzzy_entries())
+        2
+        """
+        return [e for e in self if 'fuzzy' in e.flags]
+
+    def obsolete_entries(self):
+        """
+        Convenience method that return the list of obsolete entries.
+
+        **Example**:
+
+        >>> import polib
+        >>> po = polib.pofile('tests/test_pofile_helpers.po')
+        >>> len(po.obsolete_entries())
+        4
+        """
+        return [e for e in self if e.obsolete]
+
+    def merge(self, refpot):
+        """
+        XXX this could not work if encodings are different, needs thinking
+        and general refactoring of how polib handles encoding...
+
+        Convenience method that merge the current pofile with the pot file
+        provided. It behaves exactly as the gettext msgmerge utility:
+
+          - comments of this file will be preserved, but extracted comments
+            and occurrences will be discarded
+          - any translations or comments in the file will be discarded,
+            however dot comments and file positions will be preserved
+
+        **Keyword argument**:
+          - *refpot*: object POFile, the reference catalog.
+
+        **Example**:
+
+        >>> import polib
+        >>> refpot = polib.pofile('tests/test_merge.pot')
+        >>> po = polib.pofile('tests/test_merge_before.po')
+        >>> po.merge(refpot)
+        >>> expected_po = polib.pofile('tests/test_merge_after.po')
+        >>> unicode(po) == unicode(expected_po)
+        True
+        """
+        for entry in refpot:
+            e = self.find(entry.msgid)
+            if e is None:
+                e = POEntry()
+                self.append(e)
+            e.merge(entry)
+        # ok, now we must "obsolete" entries that are not in the refpot
+        # anymore
+        for entry in self:
+            if refpot.find(entry.msgid) is None:
+                entry.obsolete = True
+
+# }}}
+# class MOFile {{{
+
+class MOFile(_BaseFile):
+    '''
+    Mo file reader/writer.
+    MOFile objects inherit the list objects methods.
+
+    **Example**:
+
+    >>> mo = MOFile()
+    >>> entry1 = POEntry(
+    ...     msgid="Some english text",
+    ...     msgstr="Un texte en anglais"
+    ... )
+    >>> entry2 = POEntry(
+    ...     msgid="I need my dirty cheese",
+    ...     msgstr="Je veux mon sale fromage"
+    ... )
+    >>> entry3 = MOEntry(
+    ...     msgid='Some entry with quotes " \\"',
+    ...     msgstr='Un message unicode avec des quotes " \\"'
+    ... )
+    >>> mo.append(entry1)
+    >>> mo.append(entry2)
+    >>> mo.append(entry3)
+    >>> print(mo)
+    msgid ""
+    msgstr ""
+    <BLANKLINE>
+    msgid "Some english text"
+    msgstr "Un texte en anglais"
+    <BLANKLINE>
+    msgid "I need my dirty cheese"
+    msgstr "Je veux mon sale fromage"
+    <BLANKLINE>
+    msgid "Some entry with quotes \\" \\""
+    msgstr "Un message unicode avec des quotes \\" \\""
+    <BLANKLINE>
+    '''
+
+    def __init__(self, *args, **kwargs):
+        """
+        MOFile constructor. Mo files have two other properties:
+            - magic_number: the magic_number of the binary file,
+            - version: the version of the mo spec.
+        """
+        _BaseFile.__init__(self, *args, **kwargs)
+        self.magic_number = None
+        self.version = 0
+
+    def save_as_pofile(self, fpath):
+        """
+        Save the string representation of the file to *fpath*.
+
+        **Keyword argument**:
+          - *fpath*: string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath)
+
+    def save(self, fpath):
+        """
+        Save the binary representation of the file to *fpath*.
+
+        **Keyword argument**:
+          - *fpath*: string, full or relative path to the file.
+        """
+        _BaseFile.save(self, fpath, 'to_binary')
+
+    def percent_translated(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return 100
+
+    def translated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return self
+
+    def untranslated_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def fuzzy_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+    def obsolete_entries(self):
+        """
+        Convenience method to keep the same interface with POFile instances.
+        """
+        return []
+
+# }}}
+# class _BaseEntry {{{
+
+class _BaseEntry(object):
+    """
+    Base class for POEntry or MOEntry objects.
+    This class must *not* be instanciated directly.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """Base Entry constructor."""
+        self.msgid = kwargs.get('msgid', '')
+        self.msgstr = kwargs.get('msgstr', '')
+        self.msgid_plural = kwargs.get('msgid_plural', '')
+        self.msgstr_plural = kwargs.get('msgstr_plural', {})
+        self.obsolete = kwargs.get('obsolete', False)
+        self.encoding = kwargs.get('encoding', default_encoding)
+        self.msgctxt = kwargs.get('msgctxt', None)
+        self._multiline_str = {}
+
+    def __repr__(self):
+        """Return the official string representation of the object."""
+        return '<%s instance at %x>' % (self.__class__.__name__, id(self))
+
+    def __str__(self, wrapwidth=78):
+        """
+        Common string representation of the POEntry and MOEntry
+        objects.
+        """
+        if self.obsolete:
+            delflag = '#~ '
+        else:
+            delflag = ''
+        ret = []
+        # write the msgctxt if any
+        if self.msgctxt is not None:
+            ret += self._str_field("msgctxt", delflag, "", self.msgctxt)
+        # write the msgid
+        ret += self._str_field("msgid", delflag, "", self.msgid)
+        # write the msgid_plural if any
+        if self.msgid_plural:
+            ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
+        if self.msgstr_plural:
+            # write the msgstr_plural if any
+            msgstrs = self.msgstr_plural
+            keys = list(msgstrs)
+            keys.sort()
+            for index in keys:
+                msgstr = msgstrs[index]
+                plural_index = '[%s]' % index
+                ret += self._str_field("msgstr", delflag, plural_index, msgstr)
+        else:
+            # otherwise write the msgstr
+            ret += self._str_field("msgstr", delflag, "", self.msgstr)
+        ret.append('')
+        return '\n'.join(ret)
+
+    def _str_field(self, fieldname, delflag, plural_index, field):
+        if (fieldname + plural_index) in self._multiline_str:
+            field = self._multiline_str[fieldname + plural_index]
+            lines = [''] + field.split('__POLIB__NL__')
+        else:
+            lines = field.splitlines(True)
+            if len(lines) > 1:
+                lines = ['']+lines # start with initial empty line
+            else:
+                lines = [field] # needed for the empty string case
+        if fieldname.startswith('previous_'):
+            # quick and dirty trick to get the real field name
+            fieldname = fieldname[9:]
+
+        ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
+                                escape(lines.pop(0)))]
+        for mstr in lines:
+            ret.append('%s"%s"' % (delflag, escape(mstr)))
+        return ret
+
+# }}}
+# class POEntry {{{
+
+class POEntry(_BaseEntry):
+    """
+    Represents a po file entry.
+
+    **Examples**:
+
+    >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
+    >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
+    >>> print(entry)
+    #: welcome.py:12 anotherfile.py:34
+    msgid "Welcome"
+    msgstr "Bienvenue"
+    <BLANKLINE>
+    >>> entry = POEntry()
+    >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
+    >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+    >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
+    >>> entry.flags.append('c-format')
+    >>> entry.previous_msgctxt = '@somecontext'
+    >>> entry.previous_msgid = 'I had eggs but no spam !'
+    >>> entry.previous_msgid_plural = 'I had eggs and %d spam !'
+    >>> entry.msgctxt = '@somenewcontext'
+    >>> entry.msgid = 'I have spam but no egg !'
+    >>> entry.msgid_plural = 'I have spam and %d eggs !'
+    >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
+    >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
+    >>> print(entry)
+    #. A plural translation. This is a very very very long line please do not
+    #. wrap, this is just for testing comment wrapping...
+    # A plural translation. This is a very very very long line please do not wrap,
+    # this is just for testing comment wrapping...
+    #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
+    #: src/eggs.c:45
+    #, c-format
+    #| msgctxt "@somecontext"
+    #| msgid "I had eggs but no spam !"
+    #| msgid_plural "I had eggs and %d spam !"
+    msgctxt "@somenewcontext"
+    msgid "I have spam but no egg !"
+    msgid_plural "I have spam and %d eggs !"
+    msgstr[0] "J'ai du jambon mais aucun oeuf !"
+    msgstr[1] "J'ai du jambon et %d oeufs !"
+    <BLANKLINE>
+    """
+
+    def __init__(self, *args, **kwargs):
+        """POEntry constructor."""
+        _BaseEntry.__init__(self, *args, **kwargs)
+        self.comment = kwargs.get('comment', '')
+        self.tcomment = kwargs.get('tcomment', '')
+        self.occurrences = kwargs.get('occurrences', [])
+        self.flags = kwargs.get('flags', [])
+        self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
+        self.previous_msgid = kwargs.get('previous_msgid', None)
+        self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+
+    def __str__(self, wrapwidth=78):
+        """
+        Return the string representation of the entry.
+        """
+        if self.obsolete:
+            return _BaseEntry.__str__(self)
+        ret = []
+        # comment first, if any (with text wrapping as xgettext does)
+        if self.comment != '':
+            for comment in self.comment.split('\n'):
+                if wrapwidth > 0 and len(comment) > wrapwidth-3:
+                    ret += textwrap.wrap(comment, wrapwidth,
+                                         initial_indent='#. ',
+                                         subsequent_indent='#. ',
+                                         break_long_words=False)
+                else:
+                    ret.append('#. %s' % comment)
+        # translator comment, if any (with text wrapping as xgettext does)
+        if self.tcomment != '':
+            for tcomment in self.tcomment.split('\n'):
+                if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
+                    ret += textwrap.wrap(tcomment, wrapwidth,
+                                         initial_indent='# ',
+                                         subsequent_indent='# ',
+                                         break_long_words=False)
+                else:
+                    ret.append('# %s' % tcomment)
+        # occurrences (with text wrapping as xgettext does)
+        if self.occurrences:
+            filelist = []
+            for fpath, lineno in self.occurrences:
+                if lineno:
+                    filelist.append('%s:%s' % (fpath, lineno))
+                else:
+                    filelist.append(fpath)
+            filestr = ' '.join(filelist)
+            if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
+                # XXX textwrap split words that contain hyphen, this is not 
+                # what we want for filenames, so the dirty hack is to 
+                # temporally replace hyphens with a char that a file cannot 
+                # contain, like "*"
+                lines = textwrap.wrap(filestr.replace('-', '*'),
+                                      wrapwidth,
+                                      initial_indent='#: ',
+                                      subsequent_indent='#: ',
+                                      break_long_words=False)
+                # end of the replace hack
+                for line in lines:
+                    ret.append(line.replace('*', '-'))
+            else:
+                ret.append('#: '+filestr)
+        # flags
+        if self.flags:
+            flags = []
+            for flag in self.flags:
+                flags.append(flag)
+            ret.append('#, %s' % ', '.join(flags))
+
+        # previous context and previous msgid/msgid_plural
+        if self.previous_msgctxt:
+            ret += self._str_field("previous_msgctxt", "#| ", "",
+                                   self.previous_msgctxt)
+        if self.previous_msgid:
+            ret += self._str_field("previous_msgid", "#| ", "", 
+                                   self.previous_msgid)
+        if self.previous_msgid_plural:
+            ret += self._str_field("previous_msgid_plural", "#| ", "", 
+                                   self.previous_msgid_plural)
+
+        ret.append(_BaseEntry.__str__(self))
+        return '\n'.join(ret)
+
+    def __cmp__(self, other):
+        '''
+        Called by comparison operations if rich comparison is not defined.
+
+        **Tests**:
+        >>> a  = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
+        >>> b  = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
+        >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
+        >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
+        >>> po = POFile()
+        >>> po.append(a)
+        >>> po.append(b)
+        >>> po.append(c1)
+        >>> po.append(c2)
+        >>> po.sort()
+        >>> print(po)
+        # 
+        msgid ""
+        msgstr ""
+        <BLANKLINE>
+        #: a.py:1 a.py:3
+        msgid "c2"
+        msgstr ""
+        <BLANKLINE>
+        #: a.py:1 b.py:1
+        msgid "c1"
+        msgstr ""
+        <BLANKLINE>
+        #: b.py:1 b.py:3
+        msgid "a"
+        msgstr ""
+        <BLANKLINE>
+        #: b.py:1 b.py:3
+        msgid "b"
+        msgstr ""
+        <BLANKLINE>
+        '''
+        def compare_occurrences(a, b):
+            """
+            Compare an entry occurrence with another one.
+            """
+            if a[0] != b[0]:
+                return a[0] < b[0]
+            if a[1] != b[1]:
+                return a[1] < b[1]
+            return 0
+
+        # First: Obsolete test
+        if self.obsolete != other.obsolete:
+            if self.obsolete:
+                return -1
+            else:
+                return 1
+        # Work on a copy to protect original
+        occ1 = self.occurrences[:]
+        occ2 = other.occurrences[:]
+        # Sorting using compare method
+        occ1.sort(compare_occurrences)
+        occ2.sort(compare_occurrences)
+        # Comparing sorted occurrences
+        pos = 0
+        for entry1 in occ1:
+            try:
+                entry2 = occ2[pos]
+            except IndexError:
+                return 1
+            pos = pos + 1
+            if entry1[0] != entry2[0]:
+                if entry1[0] > entry2[0]:
+                    return 1
+                else:
+                    return -1
+            if entry1[1] != entry2[1]:
+                if entry1[1] > entry2[1]:
+                    return 1
+                else:
+                    return -1
+        # Finally: Compare message ID
+        if self.msgid > other.msgid: return 1
+        else: return -1
+
+    def translated(self):
+        """
+        Return True if the entry has been translated or False.
+        """
+        if self.obsolete or 'fuzzy' in self.flags:
+            return False
+        if self.msgstr != '':
+            return True
+        if self.msgstr_plural:
+            for pos in self.msgstr_plural:
+                if self.msgstr_plural[pos] == '':
+                    return False
+            return True
+        return False
+
+    def merge(self, other):
+        """
+        Merge the current entry with the given pot entry.
+        """
+        self.msgid        = other.msgid
+        self.occurrences  = other.occurrences
+        self.comment      = other.comment
+        self.flags        = other.flags
+        self.msgid_plural = other.msgid_plural
+        if other.msgstr_plural:
+            for pos in other.msgstr_plural:
+                try:
+                    # keep existing translation at pos if any
+                    self.msgstr_plural[pos]
+                except KeyError:
+                    self.msgstr_plural[pos] = ''
+
+# }}}
+# class MOEntry {{{
+
+class MOEntry(_BaseEntry):
+    """
+    Represents a mo file entry.
+
+    **Examples**:
+
+    >>> entry = MOEntry()
+    >>> entry.msgid  = 'translate me !'
+    >>> entry.msgstr = 'traduisez moi !'
+    >>> print(entry)
+    msgid "translate me !"
+    msgstr "traduisez moi !"
+    <BLANKLINE>
+    """
+
+    def __str__(self, wrapwidth=78):
+        """
+        Return the string representation of the entry.
+        """
+        return _BaseEntry.__str__(self, wrapwidth)
+
+# }}}
+# class _POFileParser {{{
+
+class _POFileParser(object):
+    """
+    A finite state machine to parse efficiently and correctly po
+    file format.
+    """
+
+    def __init__(self, fpath, *args, **kwargs):
+        """
+        Constructor.
+
+        **Arguments**:
+          - *fpath*: string, path to the po file
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
+        """
+        enc = kwargs.get('encoding', default_encoding)
+        check_dup = kwargs.get('check_for_duplicates', False)
+        try:
+            self.fhandle = codecs.open(fpath, 'rU', enc)
+        except LookupError:
+            enc = default_encoding
+            self.fhandle = codecs.open(fpath, 'rU', enc)
+        self.instance = POFile(
+            fpath=fpath,
+            encoding=enc,
+            check_for_duplicates=check_dup
+        )
+        self.transitions = {}
+        self.current_entry = POEntry()
+        self.current_state = 'ST'
+        self.current_token = None
+        # two memo flags used in handlers
+        self.msgstr_index = 0
+        self.entry_obsolete = 0
+        # Configure the state machine, by adding transitions.
+        # Signification of symbols:
+        #     * ST: Beginning of the file (start)
+        #     * HE: Header
+        #     * TC: a translation comment
+        #     * GC: a generated comment
+        #     * OC: a file/line occurence
+        #     * FL: a flags line
+        #     * CT: a message context
+        #     * PC: a previous msgctxt
+        #     * PM: a previous msgid
+        #     * PP: a previous msgid_plural
+        #     * MI: a msgid
+        #     * MP: a msgid plural
+        #     * MS: a msgstr
+        #     * MX: a msgstr plural
+        #     * MC: a msgid or msgstr continuation line
+        all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
+               'MS', 'MP', 'MX', 'MI']
+
+        self.add('TC', ['ST', 'HE'],                                     'HE')
+        self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
+                        'MP', 'MX', 'MI'],                               'TC')
+        self.add('GC', all,                                              'GC')
+        self.add('OC', all,                                              'OC')
+        self.add('FL', all,                                              'FL')
+        self.add('PC', all,                                              'PC')
+        self.add('PM', all,                                              'PM')
+        self.add('PP', all,                                              'PP')
+        self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
+                        'PP', 'MS', 'MX'],                               'CT')
+        self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', 
+                 'PM', 'PP', 'MS', 'MX'],                                'MI')
+        self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'],             'MP')
+        self.add('MS', ['MI', 'MP', 'TC'],                               'MS')
+        self.add('MX', ['MI', 'MX', 'MP', 'TC'],                         'MX')
+        self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
+
+    def parse(self):
+        """
+        Run the state machine, parse the file line by line and call process()
+        with the current matched symbol.
+        """
+        i, lastlen = 1, 0
+        for line in self.fhandle:
+            line = line.strip()
+            if line == '':
+                i = i+1
+                continue
+            if line[:3] == '#~ ':
+                line = line[3:]
+                self.entry_obsolete = 1
+            else:
+                self.entry_obsolete = 0
+            self.current_token = line
+            if line[:2] == '#:':
+                # we are on a occurrences line
+                self.process('OC', i)
+            elif line[:9] == 'msgctxt "':
+                # we are on a msgctxt
+                self.process('CT', i)
+            elif line[:7] == 'msgid "':
+                # we are on a msgid
+                self.process('MI', i)
+            elif line[:8] == 'msgstr "':
+                # we are on a msgstr
+                self.process('MS', i)
+            elif line[:1] == '"' or line[:4] == '#| "':
+                # we are on a continuation line or some metadata
+                self.process('MC', i)
+            elif line[:14] == 'msgid_plural "':
+                # we are on a msgid plural
+                self.process('MP', i)
+            elif line[:7] == 'msgstr[':
+                # we are on a msgstr plural
+                self.process('MX', i)
+            elif line[:3] == '#, ':
+                # we are on a flags line
+                self.process('FL', i)
+            elif line[:2] == '# ' or line == '#':
+                if line == '#': line = line + ' '
+                # we are on a translator comment line
+                self.process('TC', i)
+            elif line[:2] == '#.':
+                # we are on a generated comment line
+                self.process('GC', i)
+            elif line[:15] == '#| msgid_plural':
+                # we are on a previous msgid_plural
+                self.process('PP', i)
+            elif line[:8] == '#| msgid':
+                self.process('PM', i)
+                # we are on a previous msgid
+            elif line[:10] == '#| msgctxt':
+                # we are on a previous msgctxt
+                self.process('PC', i)
+            i = i+1
+
+        if self.current_entry:
+            # since entries are added when another entry is found, we must add
+            # the last entry here (only if there are lines)
+            self.instance.append(self.current_entry)
+        # before returning the instance, check if there's metadata and if 
+        # so extract it in a dict
+        firstentry = self.instance[0]
+        if firstentry.msgid == '': # metadata found
+            # remove the entry
+            firstentry = self.instance.pop(0)
+            self.instance.metadata_is_fuzzy = firstentry.flags
+            key = None
+            for msg in firstentry.msgstr.splitlines():
+                try:
+                    key, val = msg.split(':', 1)
+                    self.instance.metadata[key] = val.strip()
+                except:
+                    if key is not None:
+                        self.instance.metadata[key] += '\n'+ msg.strip()
+        # close opened file
+        self.fhandle.close()
+        return self.instance
+
+    def add(self, symbol, states, next_state):
+        """
+        Add a transition to the state machine.
+        Keywords arguments:
+
+        symbol     -- string, the matched token (two chars symbol)
+        states     -- list, a list of states (two chars symbols)
+        next_state -- the next state the fsm will have after the action
+        """
+        for state in states:
+            action = getattr(self, 'handle_%s' % next_state.lower())
+            self.transitions[(symbol, state)] = (action, next_state)
+
+    def process(self, symbol, linenum):
+        """
+        Process the transition corresponding to the current state and the
+        symbol provided.
+
+        Keywords arguments:
+        symbol  -- string, the matched token (two chars symbol)
+        linenum -- integer, the current line number of the parsed file
+        """
+        try:
+            (action, state) = self.transitions[(symbol, self.current_state)]
+            if action():
+                self.current_state = state
+        except Exception, exc:
+            raise IOError('Syntax error in po file (line %s)' % linenum)
+
+    # state handlers
+
+    def handle_he(self):
+        """Handle a header comment."""
+        if self.instance.header != '':
+            self.instance.header += '\n'
+        self.instance.header += self.current_token[2:]
+        return 1
+
+    def handle_tc(self):
+        """Handle a translator comment."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        if self.current_entry.tcomment != '':
+            self.current_entry.tcomment += '\n'
+        self.current_entry.tcomment += self.current_token[2:]
+        return True
+
+    def handle_gc(self):
+        """Handle a generated comment."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        if self.current_entry.comment != '':
+            self.current_entry.comment += '\n'
+        self.current_entry.comment += self.current_token[3:]
+        return True
+
+    def handle_oc(self):
+        """Handle a file:num occurence."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        occurrences = self.current_token[3:].split()
+        for occurrence in occurrences:
+            if occurrence != '':
+                try:
+                    fil, line = occurrence.split(':')
+                    if not line.isdigit():
+                        fil  = fil + line
+                        line = ''
+                    self.current_entry.occurrences.append((fil, line))
+                except:
+                    self.current_entry.occurrences.append((occurrence, ''))
+        return True
+
+    def handle_fl(self):
+        """Handle a flags line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.flags += self.current_token[3:].split(', ')
+        return True
+
+    def handle_pp(self):
+        """Handle a previous msgid_plural line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgid_plural = \
+            unescape(self.current_token[17:-1])
+        return True
+
+    def handle_pm(self):
+        """Handle a previous msgid line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgid = \
+            unescape(self.current_token[10:-1])
+        return True
+
+    def handle_pc(self):
+        """Handle a previous msgctxt line."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.previous_msgctxt = \
+            unescape(self.current_token[12:-1])
+        return True
+
+    def handle_ct(self):
+        """Handle a msgctxt."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.msgctxt = unescape(self.current_token[9:-1])
+        return True
+
+    def handle_mi(self):
+        """Handle a msgid."""
+        if self.current_state in ['MC', 'MS', 'MX']:
+            self.instance.append(self.current_entry)
+            self.current_entry = POEntry()
+        self.current_entry.obsolete = self.entry_obsolete
+        self.current_entry.msgid = unescape(self.current_token[7:-1])
+        return True
+
+    def handle_mp(self):
+        """Handle a msgid plural."""
+        self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
+        return True
+
+    def handle_ms(self):
+        """Handle a msgstr."""
+        self.current_entry.msgstr = unescape(self.current_token[8:-1])
+        return True
+
+    def handle_mx(self):
+        """Handle a msgstr plural."""
+        index, value = self.current_token[7], self.current_token[11:-1]
+        self.current_entry.msgstr_plural[index] = unescape(value)
+        self.msgstr_index = index
+        return True
+
+    def handle_mc(self):
+        """Handle a msgid or msgstr continuation line."""
+        token = unescape(self.current_token[1:-1])
+        if self.current_state == 'CT':
+            typ = 'msgctxt'
+            self.current_entry.msgctxt += token
+        elif self.current_state == 'MI':
+            typ = 'msgid'
+            self.current_entry.msgid += token
+        elif self.current_state == 'MP':
+            typ = 'msgid_plural'
+            self.current_entry.msgid_plural += token
+        elif self.current_state == 'MS':
+            typ = 'msgstr'
+            self.current_entry.msgstr += token
+        elif self.current_state == 'MX':
+            typ = 'msgstr[%s]' % self.msgstr_index
+            self.current_entry.msgstr_plural[self.msgstr_index] += token
+        elif self.current_state == 'PP':
+            typ = 'previous_msgid_plural'
+            token = token[3:]
+            self.current_entry.previous_msgid_plural += token
+        elif self.current_state == 'PM':
+            typ = 'previous_msgid'
+            token = token[3:]
+            self.current_entry.previous_msgid += token
+        elif self.current_state == 'PC':
+            typ = 'previous_msgctxt'
+            token = token[3:]
+            self.current_entry.previous_msgctxt += token
+        if typ not in self.current_entry._multiline_str:
+            self.current_entry._multiline_str[typ] = token
+        else:
+            self.current_entry._multiline_str[typ] += "__POLIB__NL__" + token
+        # don't change the current state
+        return False
+
+# }}}
+# class _MOFileParser {{{
+
+class _MOFileParser(object):
+    """
+    A class to parse binary mo files.
+    """
+    BIG_ENDIAN    = 0xde120495
+    LITTLE_ENDIAN = 0x950412de
+
+    def __init__(self, fpath, *args, **kwargs):
+        """
+        Constructor.
+
+        **Arguments**:
+          - *fpath*: string, path to the po file
+          - *encoding*: string, the encoding to use, defaults to
+            "default_encoding" global variable (optional),
+          - *check_for_duplicates*: whether to check for duplicate entries
+            when adding entries to the file, default: False (optional).
+        """
+        enc = kwargs.get('encoding', default_encoding)
+        check_dup = kwargs.get('check_for_duplicates', False)
+        self.fhandle = open(fpath, 'rb')
+        self.instance = MOFile(
+            fpath=fpath,
+            encoding=enc,
+            check_for_duplicates=check_dup
+        )
+
+    def parse_magicnumber(self):
+        """
+        Parse the magic number and raise an exception if not valid.
+        """
+
+    def parse(self):
+        """
+        Build the instance with the file handle provided in the
+        constructor.
+        """
+        magic_number = self._readbinary('<I', 4)
+        if magic_number == self.LITTLE_ENDIAN:
+            ii = '<II'
+        elif magic_number == self.BIG_ENDIAN:
+            ii = '>II'
+        else:
+            raise IOError('Invalid mo file, magic number is incorrect !')
+        self.instance.magic_number = magic_number
+        # parse the version number and the number of strings
+        self.instance.version, numofstrings = self._readbinary(ii, 8)
+        # original strings and translation strings hash table offset
+        msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
+        # move to msgid hash table and read length and offset of msgids
+        self.fhandle.seek(msgids_hash_offset)
+        msgids_index = []
+        for i in range(numofstrings):
+            msgids_index.append(self._readbinary(ii, 8))
+        # move to msgstr hash table and read length and offset of msgstrs
+        self.fhandle.seek(msgstrs_hash_offset)
+        msgstrs_index = []
+        for i in range(numofstrings):
+            msgstrs_index.append(self._readbinary(ii, 8))
+        # build entries
+        for i in range(numofstrings):
+            self.fhandle.seek(msgids_index[i][1])
+            msgid = self.fhandle.read(msgids_index[i][0])
+            self.fhandle.seek(msgstrs_index[i][1])
+            msgstr = self.fhandle.read(msgstrs_index[i][0])
+            if i == 0: # metadata
+                raw_metadata, metadata = msgstr.split('\n'), {}
+                for line in raw_metadata:
+                    tokens = line.split(':', 1)
+                    if tokens[0] != '':
+                        try:
+                            metadata[tokens[0]] = tokens[1].strip()
+                        except IndexError:
+                            metadata[tokens[0]] = ''
+                self.instance.metadata = metadata
+                continue
+            # test if we have a plural entry
+            msgid_tokens = msgid.split('\0')
+            if len(msgid_tokens) > 1:
+                entry = MOEntry(
+                    msgid=msgid_tokens[0],
+                    msgid_plural=msgid_tokens[1],
+                    msgstr_plural=dict((k,v) for k,v in \
+                        enumerate(msgstr.split('\0')))
+                )
+            else:
+                entry = MOEntry(msgid=msgid, msgstr=msgstr)
+            self.instance.append(entry)
+        # close opened file
+        self.fhandle.close()
+        return self.instance
+
+    def _readbinary(self, fmt, numbytes):
+        """
+        Private method that unpack n bytes of data using format <fmt>.
+        It returns a tuple or a mixed value if the tuple length is 1.
+        """
+        bytes = self.fhandle.read(numbytes)
+        tup = struct.unpack(fmt, bytes)
+        if len(tup) == 1:
+            return tup[0]
+        return tup
+
+# }}}
+# __main__ {{{
+
+if __name__ == '__main__':
+    """
+    **Main function**::
+      - to **test** the module just run: *python polib.py [-v]*
+      - to **profile** the module: *python polib.py -p <some_pofile.po>*
+    """
+    import sys
+    if len(sys.argv) > 2 and sys.argv[1] == '-p':
+        def test(f):
+            if f.endswith('po'):
+                p = pofile(f)
+            else:
+                p = mofile(f)
+            s = unicode(p)
+        import profile
+        profile.run('test("'+sys.argv[2]+'")')
+    else:
+        import doctest
+        doctest.testmod()
+
+# }}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/i18n/posplit	Sat Jun 19 17:06:11 2010 +0200
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# posplit - split messages in paragraphs on .po/.pot files
+#
+# license: MIT/X11/Expat
+#
+
+import sys
+import polib
+
+def addentry(po, entry, cache):
+    e = cache.get(entry.msgid)
+    if e:
+        e.occurrences.extend(entry.occurrences)
+    else:
+        po.append(entry)
+        cache[entry.msgid] = entry
+
+def mkentry(orig, delta, msgid, msgstr):
+    entry = polib.POEntry()
+    entry.merge(orig)
+    entry.msgid = msgid or orig.msgid
+    entry.msgstr = msgstr or orig.msgstr
+    entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences]
+    return entry
+
+if __name__ == "__main__":
+    po = polib.pofile(sys.argv[1])
+
+    cache = {}
+    entries = po[:]
+    po[:] = []
+    for entry in entries:
+        msgids = entry.msgid.split(u'\n\n')
+        if entry.msgstr:
+            msgstrs = entry.msgstr.split(u'\n\n')
+        else:
+            msgstrs = [u''] * len(msgids)
+
+        if len(msgids) != len(msgstrs):
+            # places the whole existing translation as a fuzzy
+            # translation for each paragraph, to give the
+            # translator a chance to recover part of the old
+            # translation - erasing extra paragraphs is
+            # probably better than retranslating all from start
+            if 'fuzzy' not in entry.flags:
+                entry.flags.append('fuzzy')
+            msgstrs = [entry.msgstr] * len(msgids)
+
+        delta = 0
+        for msgid, msgstr in zip(msgids, msgstrs):
+            if msgid:
+                newentry = mkentry(entry, delta, msgid, msgstr)
+                addentry(po, newentry, cache)
+            delta += 2 + msgid.count('\n')
+    po.save()
--- a/mercurial/commands.py	Fri Jun 18 15:22:56 2010 +0200
+++ b/mercurial/commands.py	Sat Jun 19 17:06:11 2010 +0200
@@ -2451,7 +2451,8 @@
 
     If no revision range is specified, the default is tip:0 unless
     --follow is set, in which case the working directory parent is
-    used as the starting revision.
+    used as the starting revision. You can specify a revision set for
+    log, see :hg:`help revsets` for more information.
 
     See :hg:`help dates` for a list of formats valid for -d/--date.
 
--- a/mercurial/help.py	Fri Jun 18 15:22:56 2010 +0200
+++ b/mercurial/help.py	Sat Jun 19 17:06:11 2010 +0200
@@ -92,6 +92,7 @@
      loaddoc('revisions')),
     (['mrevs', 'multirevs'], _('Specifying Multiple Revisions'),
      loaddoc('multirevs')),
+    (['revsets'], _("Specifying Revision Sets"), loaddoc('revsets')),
     (['diffs'], _('Diff Formats'), loaddoc('diffs')),
     (['templating', 'templates'], _('Template Usage'),
      loaddoc('templates')),
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/help/revsets.txt	Sat Jun 19 17:06:11 2010 +0200
@@ -0,0 +1,166 @@
+Mercurial supports a functional language for selecting a set of
+revisions.
+
+The language supports a number of predicates which are joined by infix
+operators. Parenthesis can be used for grouping.
+
+Identifiers such as branch names must be quoted with single or double
+quotes if they contain characters outside of ``[a-zA-Z0-9]`` or if
+they match one of the predefined predicates. Special characters can be
+used in the identifiers by quoting them, e.g., ``\n`` is interpreted
+as a newline.
+
+There is a single prefix operator:
+
+``not x``
+  Changesets not in x. Short form is ``! x``.
+
+These are the supported infix operators:
+
+``x::y``
+  A DAG range, meaning all changesets that are descendants of x and
+  ancestors of y, including x and y themselves. If the first endpoint
+  is left out, this is equivalent to ``ancestors(y)``, if the second
+  is left out it is equivalent to ``descendents(x)``.
+
+  An alternative syntax is ``x..y``.
+
+``x:y``
+  All changesets with revision numbers between x and y, both
+  inclusive. Either endpoint can be left out, they default to 0 and
+  tip.
+
+``x and y``
+  The intersection of changesets in x and y. Short form is ``x & y``.
+
+``x or y``
+  The union of changesets in x and y. There are two alternative short
+  forms: ``x | y`` and ``x + y``.
+
+``x - y``
+  Changesets in x but not in y.
+
+The following predicates are supported:
+
+``adds(pattern)``
+  Changesets that add a file matching pattern.
+
+``all()``
+  All changesets, the same as ``0:tip``.
+
+``ancestor(single, single)``
+  Greatest common ancestor of the two changesets.
+
+``ancestors(set)``
+  Changesets that are ancestors of a changeset in set.
+
+``author(string)``
+  Alias for ``user(string)``.
+
+``branch(set)``
+  The branch names are found for changesets in set, and the result is
+  all changesets belonging to one those branches.
+
+``children(set)``
+  Child changesets of changesets in set.
+
+``closed()``
+  Changeset is closed.
+
+``contains(pattern)``
+  Revision contains pattern.
+
+``date(interval)``
+  Changesets within the interval, see :hg:`help dates`.
+
+``descendants(set)``
+  Changesets which are decendants of changesets in set.
+
+``file(pattern)``
+  Changesets which manually affected files matching pattern.
+
+``follow()``
+  An alias for ``::.`` (ancestors of the working copy's first parent).
+
+``grep(regex)``
+  Like ``keyword(string)`` but accepts a regex.
+
+``head()``
+  Changeset is a head.
+
+``heads(set)``
+  Members of set with no children in set.
+
+``keyword(string)``
+  Search commit message, user name, and names of changed files for
+  string.
+
+``limit(set, n)``
+  First n members of set.
+
+``max(set)``
+  Changeset with highest revision number in set.
+
+``merge()``
+  Changeset is a merge changeset.
+
+``modifies(pattern)``
+  Changesets which modify files matching pattern.
+
+``outgoing([path])``
+  Changesets missing in path.
+
+``p1(set)``
+  First parent of changesets in set.
+
+``p2(set)``
+  Second parent of changesets in set.
+
+``parents(set)``
+  The set of all parents for all changesets in set.
+
+``removes(pattern)``
+  Changesets which remove files matching pattern.
+
+``reverse(set)``
+  Reverse order of set.
+
+``roots(set)``
+  Changesets with no parent changeset in set.
+
+``sort(set[, [-]key...])``
+  Sort set by keys. The default sort order is ascending, specify a key
+  as ``-key`` to sort in descending order.
+
+  The keys can be:
+
+  - ``rev`` for the revision number,
+  - ``branch`` for the branch name,
+  - ``desc`` for the commit message (description),
+  - ``user`` for user name (``author`` can be used as an alias),
+  - ``date`` for the commit date
+
+``tagged()``
+  Changeset is tagged.
+
+``user(string)``
+  User name is string.
+
+Command line equivalents for :hg:`log`::
+
+  -f    ->  ::.
+  -d x  ->  date(x)
+  -k x  ->  keyword(x)
+  -m    ->  merge()
+  -u x  ->  user(x)
+  -b x  ->  branch(x)
+  -P x  ->  !::x
+  -l x  ->  limit(expr, x)
+
+Some sample queries::
+
+  hg log -r 'branch(default)'
+  hg log -r 'branch(default) and 1.5:: and not merge()'
+  hg log -r '1.3::1.5 and keyword(bug) and file("hgext/*")'
+  hg log -r 'sort(date("May 2008"), user)'
+  hg log -r '(keyword(bug) or keyword(issue)) and not ancestors(tagged())'
--- a/mercurial/i18n.py	Fri Jun 18 15:22:56 2010 +0200
+++ b/mercurial/i18n.py	Sat Jun 19 17:06:11 2010 +0200
@@ -36,7 +36,7 @@
     if message is None:
         return message
 
-    u = t.ugettext(message)
+    u = u'\n\n'.join([t.ugettext(m) for m in message.split('\n\n')])
     try:
         # encoding.tolocal cannot be used since it will first try to
         # decode the Unicode string. Calling u.decode(enc) really
--- a/mercurial/revset.py	Fri Jun 18 15:22:56 2010 +0200
+++ b/mercurial/revset.py	Sat Jun 19 17:06:11 2010 +0200
@@ -8,6 +8,7 @@
 import re
 import parser, util, error, discovery
 import match as _match
+from i18n import _
 
 elements = {
     "(": (20, ("group", 1, ")"), ("func", 1, ")")),
@@ -60,7 +61,7 @@
                     break
                 pos += 1
             else:
-                raise error.ParseError("unterminated string", s)
+                raise error.ParseError(_("unterminated string"), s)
         elif c.isalnum() or c in '.': # gather up a symbol/keyword
             s = pos
             pos += 1
@@ -79,7 +80,7 @@
                 yield ('symbol', sym, s)
             pos -= 1
         else:
-            raise error.ParseError("syntax error", pos)
+            raise error.ParseError(_("syntax error"), pos)
         pos += 1
     yield ('end', None, pos)
 
@@ -105,14 +106,14 @@
 
 def getset(repo, subset, x):
     if not x:
-        raise error.ParseError("missing argument")
+        raise error.ParseError(_("missing argument"))
     return methods[x[0]](repo, subset, *x[1:])
 
 # operator methods
 
 def negate(repo, subset, x):
     return getset(repo, subset,
-                  ('string', '-' + getstring(x, "can't negate that")))
+                  ('string', '-' + getstring(x, _("can't negate that"))))
 
 def stringset(repo, subset, x):
     x = repo[x].rev()
@@ -124,7 +125,7 @@
 
 def symbolset(repo, subset, x):
     if x in symbols:
-        raise error.ParseError("can't use %s here" % x)
+        raise error.ParseError(_("can't use %s here") % x)
     return stringset(repo, subset, x)
 
 def rangeset(repo, subset, x, y):
@@ -147,12 +148,12 @@
     return [r for r in subset if r not in s]
 
 def listset(repo, subset, a, b):
-    raise error.ParseError("can't use a list in this context")
+    raise error.ParseError(_("can't use a list in this context"))
 
 def func(repo, subset, a, b):
     if a[0] == 'symbol' and a[1] in symbols:
         return symbols[a[1]](repo, subset, b)
-    raise error.ParseError("not a function: %s" % a[1])
+    raise error.ParseError(_("not a function: %s") % a[1])
 
 # functions
 
@@ -186,11 +187,11 @@
     return []
 
 def limit(repo, subset, x):
-    l = getargs(x, 2, 2, "limit wants two args")
+    l = getargs(x, 2, 2, _("limit wants two arguments"))
     try:
-        lim = int(getstring(l[1], "limit wants a number"))
+        lim = int(getstring(l[1], _("limit wants a number")))
     except ValueError:
-        raise error.ParseError("limit expects a number")
+        raise error.ParseError(_("limit expects a number"))
     return getset(repo, subset, l[0])[:lim]
 
 def children(repo, subset, x):
@@ -212,11 +213,11 @@
     return [r for r in subset if r in s or repo[r].branch() in b]
 
 def ancestor(repo, subset, x):
-    l = getargs(x, 2, 2, "ancestor wants two args")
+    l = getargs(x, 2, 2, _("ancestor wants two arguments"))
     a = getset(repo, subset, l[0])
     b = getset(repo, subset, l[1])
     if len(a) > 1 or len(b) > 1:
-        raise error.ParseError("ancestor args must be single revisions")
+        raise error.ParseError(_("ancestor arguments must be single revisions"))
     return [repo[a[0]].ancestor(repo[b[0]]).rev()]
 
 def ancestors(repo, subset, x):
@@ -230,18 +231,18 @@
     return [r for r in subset if r in s]
 
 def follow(repo, subset, x):
-    getargs(x, 0, 0, "follow takes no arguments")
+    getargs(x, 0, 0, _("follow takes no arguments"))
     p = repo['.'].rev()
     s = set(repo.changelog.ancestors(p)) | set([p])
     return [r for r in subset if r in s]
 
 def date(repo, subset, x):
-    ds = getstring(x, 'date wants a string')
+    ds = getstring(x, _("date wants a string"))
     dm = util.matchdate(ds)
     return [r for r in subset if dm(repo[r].date()[0])]
 
 def keyword(repo, subset, x):
-    kw = getstring(x, "keyword wants a string").lower()
+    kw = getstring(x, _("keyword wants a string")).lower()
     l = []
     for r in subset:
         c = repo[r]
@@ -251,7 +252,7 @@
     return l
 
 def grep(repo, subset, x):
-    gr = re.compile(getstring(x, "grep wants a string"))
+    gr = re.compile(getstring(x, _("grep wants a string")))
     l = []
     for r in subset:
         c = repo[r]
@@ -262,11 +263,11 @@
     return l
 
 def author(repo, subset, x):
-    n = getstring(x, "author wants a string").lower()
+    n = getstring(x, _("author wants a string")).lower()
     return [r for r in subset if n in repo[r].user().lower()]
 
 def hasfile(repo, subset, x):
-    pat = getstring(x, "file wants a pattern")
+    pat = getstring(x, _("file wants a pattern"))
     m = _match.match(repo.root, repo.getcwd(), [pat])
     s = []
     for r in subset:
@@ -277,7 +278,7 @@
     return s
 
 def contains(repo, subset, x):
-    pat = getstring(x, "file wants a pattern")
+    pat = getstring(x, _("file wants a pattern"))
     m = _match.match(repo.root, repo.getcwd(), [pat])
     s = []
     if m.files() == [pat]:
@@ -321,28 +322,28 @@
     return s
 
 def modifies(repo, subset, x):
-    pat = getstring(x, "modifies wants a pattern")
+    pat = getstring(x, _("modifies wants a pattern"))
     return checkstatus(repo, subset, pat, 0)
 
 def adds(repo, subset, x):
-    pat = getstring(x, "adds wants a pattern")
+    pat = getstring(x, _("adds wants a pattern"))
     return checkstatus(repo, subset, pat, 1)
 
 def removes(repo, subset, x):
-    pat = getstring(x, "removes wants a pattern")
+    pat = getstring(x, _("removes wants a pattern"))
     return checkstatus(repo, subset, pat, 2)
 
 def merge(repo, subset, x):
-    getargs(x, 0, 0, "merge takes no arguments")
+    getargs(x, 0, 0, _("merge takes no arguments"))
     cl = repo.changelog
     return [r for r in subset if cl.parentrevs(r)[1] != -1]
 
 def closed(repo, subset, x):
-    getargs(x, 0, 0, "closed takes no arguments")
+    getargs(x, 0, 0, _("closed takes no arguments"))
     return [r for r in subset if repo[r].extra().get('close')]
 
 def head(repo, subset, x):
-    getargs(x, 0, 0, "head takes no arguments")
+    getargs(x, 0, 0, _("head takes no arguments"))
     hs = set()
     for b, ls in repo.branchmap().iteritems():
         hs.update(repo[h].rev() for h in ls)
@@ -354,10 +355,10 @@
     return l
 
 def sort(repo, subset, x):
-    l = getargs(x, 1, 2, "sort wants one or two arguments")
+    l = getargs(x, 1, 2, _("sort wants one or two arguments"))
     keys = "rev"
     if len(l) == 2:
-        keys = getstring(l[1], "sort spec must be a string")
+        keys = getstring(l[1], _("sort spec must be a string"))
 
     s = l[0]
     keys = keys.split()
@@ -389,14 +390,14 @@
             elif k == '-date':
                 e.append(-c.date()[0])
             else:
-                raise error.ParseError("unknown sort key %r" % k)
+                raise error.ParseError(_("unknown sort key %r") % k)
         e.append(r)
         l.append(e)
     l.sort()
     return [e[-1] for e in l]
 
 def getall(repo, subset, x):
-    getargs(x, 0, 0, "all takes no arguments")
+    getargs(x, 0, 0, _("all takes no arguments"))
     return subset
 
 def heads(repo, subset, x):
@@ -411,7 +412,7 @@
 
 def outgoing(repo, subset, x):
     import hg # avoid start-up nasties
-    l = getargs(x, 0, 1, "outgoing wants a repo path")
+    l = getargs(x, 0, 1, _("outgoing wants a repository path"))
     dest = l[1:] or ''
     dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
     dest, branches = hg.parseurl(dest)
@@ -425,7 +426,7 @@
     return [r for r in subset if r in o]
 
 def tagged(repo, subset, x):
-    getargs(x, 0, 0, "tagged takes no arguments")
+    getargs(x, 0, 0, _("tagged takes no arguments"))
     cl = repo.changelog
     s = set([cl.rev(n) for t, n in repo.tagslist() if t != 'tip'])
     return [r for r in subset if r in s]
@@ -523,7 +524,7 @@
         wb, tb = optimize(x[2], small)
         return wa + wb, (op, ta, tb)
     elif op == 'func':
-        f = getstring(x[1], "not a symbol")
+        f = getstring(x[1], _("not a symbol"))
         wa, ta = optimize(x[2], small)
         if f in "grep date user author keyword branch file":
             w = 10 # slow
@@ -545,6 +546,8 @@
 parse = parser.parser(tokenize, elements).parse
 
 def match(spec):
+    if not spec:
+        raise error.ParseError(_("empty query"))
     tree = parse(spec)
     weight, tree = optimize(tree, True)
     def mfunc(repo, subset):
--- a/tests/test-globalopts.out	Fri Jun 18 15:22:56 2010 +0200
+++ b/tests/test-globalopts.out	Sat Jun 19 17:06:11 2010 +0200
@@ -208,6 +208,7 @@
  environment  Environment Variables
  revisions    Specifying Single Revisions
  multirevs    Specifying Multiple Revisions
+ revsets      Specifying Revision Sets
  diffs        Diff Formats
  templating   Template Usage
  urls         URL Paths
@@ -279,6 +280,7 @@
  environment  Environment Variables
  revisions    Specifying Single Revisions
  multirevs    Specifying Multiple Revisions
+ revsets      Specifying Revision Sets
  diffs        Diff Formats
  templating   Template Usage
  urls         URL Paths
--- a/tests/test-help.out	Fri Jun 18 15:22:56 2010 +0200
+++ b/tests/test-help.out	Sat Jun 19 17:06:11 2010 +0200
@@ -101,6 +101,7 @@
  environment  Environment Variables
  revisions    Specifying Single Revisions
  multirevs    Specifying Multiple Revisions
+ revsets      Specifying Revision Sets
  diffs        Diff Formats
  templating   Template Usage
  urls         URL Paths
@@ -168,6 +169,7 @@
  environment  Environment Variables
  revisions    Specifying Single Revisions
  multirevs    Specifying Multiple Revisions
+ revsets      Specifying Revision Sets
  diffs        Diff Formats
  templating   Template Usage
  urls         URL Paths
@@ -619,6 +621,7 @@
  environment  Environment Variables
  revisions    Specifying Single Revisions
  multirevs    Specifying Multiple Revisions
+ revsets      Specifying Revision Sets
  diffs        Diff Formats
  templating   Template Usage
  urls         URL Paths