--- a/i18n/polib.py Fri Oct 12 10:01:09 2018 -0400
+++ b/i18n/polib.py Fri Oct 12 11:44:27 2018 -0400
@@ -1,5 +1,5 @@
-# -*- coding: utf-8 -*-
# no-check-code
+# -* coding: utf-8 -*-
#
# License: MIT (see LICENSE file provided)
# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
@@ -15,10 +15,10 @@
from __future__ import absolute_import
-__author__ = 'David Jean Louis <izimobil@gmail.com>'
-__version__ = '0.6.4'
-__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
- 'detect_encoding', 'escape', 'unescape', 'detect_encoding',]
+__author__ = 'David Jean Louis <izimobil@gmail.com>'
+__version__ = '1.0.7'
+__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
+ 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
import array
import codecs
@@ -27,14 +27,47 @@
import struct
import sys
import textwrap
-import types
+
+try:
+ import io
+except ImportError:
+ # replacement of io.open() for python < 2.6
+ # we use codecs instead
+ class io(object):
+ @staticmethod
+ def open(fpath, mode='r', encoding=None):
+ return codecs.open(fpath, mode, encoding)
# the default encoding to use when encoding cannot be detected
default_encoding = 'utf-8'
+# python 2/3 compatibility helpers {{{
+
+
+if sys.version_info[:2] < (3, 0):
+ PY3 = False
+ text_type = unicode
+
+ def b(s):
+ return s
+
+ def u(s):
+ return unicode(s, "unicode_escape")
+
+else:
+ PY3 = True
+ text_type = str
+
+ def b(s):
+ return s.encode("latin-1")
+
+ def u(s):
+ return s
+# }}}
# _pofile_or_mofile {{{
+
def _pofile_or_mofile(f, type, **kwargs):
"""
Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
@@ -50,15 +83,34 @@
parser = kls(
f,
encoding=enc,
- check_for_duplicates=kwargs.get('check_for_duplicates', False)
+ check_for_duplicates=kwargs.get('check_for_duplicates', False),
+ klass=kwargs.get('klass')
)
instance = parser.parse()
instance.wrapwidth = kwargs.get('wrapwidth', 78)
return instance
+# }}}
+# _is_file {{{
+
+def _is_file(filename_or_contents):
+ """
+ Safely returns the value of os.path.exists(filename_or_contents).
+
+ Arguments:
+
+ ``filename_or_contents``
+ either a filename, or a string holding the contents of some file.
+ In the latter case, this function will always return False.
+ """
+ try:
+ return os.path.exists(filename_or_contents)
+ except (ValueError, UnicodeEncodeError):
+ return False
# }}}
# function pofile() {{{
+
def pofile(pofile, **kwargs):
"""
Convenience function that parses the po or pot file ``pofile`` and returns
@@ -80,12 +132,17 @@
``check_for_duplicates``
whether to check for duplicate entries when adding entries to the
file (optional, default: ``False``).
+
+ ``klass``
+ class which is used to instantiate the return value (optional,
+ default: ``None``, the return value with be a :class:`~polib.POFile`
+ instance).
"""
return _pofile_or_mofile(pofile, 'pofile', **kwargs)
-
# }}}
# function mofile() {{{
+
def mofile(mofile, **kwargs):
"""
Convenience function that parses the mo file ``mofile`` and returns a
@@ -108,12 +165,17 @@
``check_for_duplicates``
whether to check for duplicate entries when adding entries to the
file (optional, default: ``False``).
+
+ ``klass``
+ class which is used to instantiate the return value (optional,
+ default: ``None``, the return value with be a :class:`~polib.POFile`
+ instance).
"""
return _pofile_or_mofile(mofile, 'mofile', **kwargs)
-
# }}}
# function detect_encoding() {{{
+
def detect_encoding(file, binary_mode=False):
"""
Try to detect the encoding used by the ``file``. The ``file`` argument can
@@ -129,7 +191,9 @@
``binary_mode``
boolean, set this to True if ``file`` is a mo file.
"""
- rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
+ PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
+ rxt = re.compile(u(PATTERN))
+ rxb = re.compile(b(PATTERN))
def charset_exists(charset):
"""Check whether ``charset`` is valid or not."""
@@ -139,31 +203,36 @@
return False
return True
- if not os.path.exists(file):
- match = rx.search(file)
+ if not _is_file(file):
+ match = rxt.search(file)
if match:
enc = match.group(1).strip()
if charset_exists(enc):
return enc
else:
- if binary_mode:
+ # For PY3, always treat as binary
+ if binary_mode or PY3:
mode = 'rb'
+ rx = rxb
else:
mode = 'r'
+ rx = rxt
f = open(file, mode)
for l in f.readlines():
match = rx.search(l)
if match:
f.close()
enc = match.group(1).strip()
+ if not isinstance(enc, text_type):
+ enc = enc.decode('utf-8')
if charset_exists(enc):
return enc
f.close()
return default_encoding
-
# }}}
# function escape() {{{
+
def escape(st):
"""
Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -174,10 +243,10 @@
.replace('\r', r'\r')\
.replace('\n', r'\n')\
.replace('\"', r'\"')
-
# }}}
# function unescape() {{{
+
def unescape(st):
"""
Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
@@ -193,12 +262,12 @@
return '\r'
if m == '\\':
return '\\'
- return m # handles escaped double quote
+ return m # handles escaped double quote
return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
-
# }}}
# class _BaseFile {{{
+
class _BaseFile(list):
"""
Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
@@ -227,7 +296,7 @@
list.__init__(self)
# the opened file handle
pofile = kwargs.get('pofile', None)
- if pofile and os.path.exists(pofile):
+ if pofile and _is_file(pofile):
self.fpath = pofile
else:
self.fpath = kwargs.get('fpath')
@@ -254,38 +323,45 @@
ret.append(entry.__unicode__(self.wrapwidth))
for entry in self.obsolete_entries():
ret.append(entry.__unicode__(self.wrapwidth))
- ret = '\n'.join(ret)
+ ret = u('\n').join(ret)
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
+ assert isinstance(ret, text_type)
+ #if type(ret) != text_type:
+ # return unicode(ret, self.encoding)
return ret
- def __str__(self):
- """
- Returns the string representation of the file.
- """
- return unicode(self).encode(self.encoding)
+ if PY3:
+ def __str__(self):
+ return self.__unicode__()
+ else:
+ def __str__(self):
+ """
+ Returns the string representation of the file.
+ """
+ return unicode(self).encode(self.encoding)
def __contains__(self, entry):
"""
- Overriden ``list`` method to implement the membership test (in and
+ Overridden ``list`` method to implement the membership test (in and
not in).
The method considers that an entry is in the file if it finds an entry
- that has the same msgid (the test is **case sensitive**).
+ that has the same msgid (the test is **case sensitive**) and the same
+ msgctxt (or none for both entries).
Argument:
``entry``
an instance of :class:`~polib._BaseEntry`.
"""
- return self.find(entry.msgid, by='msgid') is not None
+ return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \
+ is not None
def __eq__(self, other):
- return unicode(self) == unicode(other)
+ return str(self) == str(other)
def append(self, entry):
"""
- Overriden method to check for duplicates entries, if a user tries to
+ Overridden method to check for duplicates entries, if a user tries to
add an entry that is already in the file, the method will raise a
``ValueError`` exception.
@@ -300,7 +376,7 @@
def insert(self, index, entry):
"""
- Overriden method to check for duplicates entries, if a user tries to
+ Overridden method to check for duplicates entries, if a user tries to
add an entry that is already in the file, the method will raise a
``ValueError`` exception.
@@ -332,7 +408,7 @@
e.flags.append('fuzzy')
return e
- def save(self, fpath=None, repr_method='__str__'):
+ def save(self, fpath=None, repr_method='__unicode__'):
"""
Saves the po file to ``fpath``.
If it is an existing file and no ``fpath`` is provided, then the
@@ -354,8 +430,8 @@
if repr_method == 'to_binary':
fhandle = open(fpath, 'wb')
else:
- fhandle = codecs.open(fpath, 'w', self.encoding)
- if type(contents) != types.UnicodeType:
+ fhandle = io.open(fpath, 'w', encoding=self.encoding)
+ if not isinstance(contents, text_type):
contents = contents.decode(self.encoding)
fhandle.write(contents)
fhandle.close()
@@ -381,7 +457,7 @@
boolean, whether to also search in entries that are obsolete.
``msgctxt``
- string, allows to specify a specific message context for the
+ string, allows specifying a specific message context for the
search.
"""
if include_obsolete_entries:
@@ -390,7 +466,7 @@
entries = [e for e in self if not e.obsolete]
for e in entries:
if getattr(e, by) == st:
- if msgctxt and e.msgctxt != msgctxt:
+ if msgctxt is not False and e.msgctxt != msgctxt:
continue
return e
return None
@@ -412,7 +488,9 @@
'Language-Team',
'MIME-Version',
'Content-Type',
- 'Content-Transfer-Encoding'
+ 'Content-Transfer-Encoding',
+ 'Language',
+ 'Plural-Forms'
]
ordered_data = []
for data in data_order:
@@ -423,9 +501,7 @@
pass
# the rest of the metadata will be alphabetically ordered since there
# are no specs for this AFAIK
- keys = metadata.keys()
- keys.sort()
- for data in keys:
+ for data in sorted(metadata.keys()):
value = metadata[data]
ordered_data.append((data, value))
return ordered_data
@@ -436,18 +512,12 @@
"""
offsets = []
entries = self.translated_entries()
+
# the keys are sorted in the .mo file
def cmp(_self, other):
# msgfmt compares entries with msgctxt if it exists
- if _self.msgctxt:
- self_msgid = _self.msgctxt
- else:
- self_msgid = _self.msgid
-
- if other.msgctxt:
- other_msgid = other.msgctxt
- else:
- other_msgid = other.msgid
+ self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid
+ other_msgid = other.msgctxt and other.msgctxt or other.msgid
if self_msgid > other_msgid:
return 1
elif self_msgid < other_msgid:
@@ -455,25 +525,23 @@
else:
return 0
# add metadata entry
- entries.sort(cmp)
+ entries.sort(key=lambda o: o.msgctxt or o.msgid)
mentry = self.metadata_as_entry()
#mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
entries = [mentry] + entries
entries_len = len(entries)
- ids, strs = '', ''
+ ids, strs = b(''), b('')
for e in entries:
# For each string, we need size and file offset. Each string is
# NUL terminated; the NUL does not count into the size.
- msgid = ''
+ msgid = b('')
if e.msgctxt:
# Contexts are stored by storing the concatenation of the
# context, a <EOT> byte, and the original string
msgid = self._encode(e.msgctxt + '\4')
if e.msgid_plural:
- indexes = e.msgstr_plural.keys()
- indexes.sort()
msgstr = []
- for index in indexes:
+ for index in sorted(e.msgstr_plural.keys()):
msgstr.append(e.msgstr_plural[index])
msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
msgstr = self._encode('\0'.join(msgstr))
@@ -481,11 +549,11 @@
msgid += self._encode(e.msgid)
msgstr = self._encode(e.msgstr)
offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
- ids += msgid + '\0'
- strs += msgstr + '\0'
+ ids += msgid + b('\0')
+ strs += msgstr + b('\0')
# The header is 7 32-bit unsigned integers.
- keystart = 7*4+16*entries_len
+ keystart = 7 * 4 + 16 * entries_len
# and the values start after the keys
valuestart = keystart + len(ids)
koffsets = []
@@ -493,26 +561,30 @@
# The string table first has the list of keys, then the list of values.
# Each entry has first the size of the string, then the file offset.
for o1, l1, o2, l2 in offsets:
- koffsets += [l1, o1+keystart]
- voffsets += [l2, o2+valuestart]
+ koffsets += [l1, o1 + keystart]
+ voffsets += [l2, o2 + valuestart]
offsets = koffsets + voffsets
- # check endianness for magic number
- if struct.pack('@h', 1) == struct.pack('<h', 1):
- magic_number = MOFile.LITTLE_ENDIAN
- else:
- magic_number = MOFile.BIG_ENDIAN
output = struct.pack(
"Iiiiiii",
- magic_number, # Magic number
- 0, # Version
- entries_len, # # of entries
- 7*4, # start of key index
- 7*4+entries_len*8, # start of value index
- 0, keystart # size and offset of hash table
- # Important: we don't use hash tables
+ # Magic number
+ MOFile.MAGIC,
+ # Version
+ 0,
+ # number of entries
+ entries_len,
+ # start of key index
+ 7 * 4,
+ # start of value index
+ 7 * 4 + entries_len * 8,
+ # size and offset of hash table, we don't use hash tables
+ 0, keystart
+
)
- output += array.array("i", offsets).tostring()
+ if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
+ output += array.array("i", offsets).tobytes()
+ else:
+ output += array.array("i", offsets).tostring()
output += ids
output += strs
return output
@@ -522,13 +594,13 @@
Encodes the given ``mixed`` argument with the file encoding if and
only if it's an unicode string and returns the encoded string.
"""
- if type(mixed) == types.UnicodeType:
- return mixed.encode(self.encoding)
+ if isinstance(mixed, text_type):
+ mixed = mixed.encode(self.encoding)
return mixed
-
# }}}
# class POFile {{{
+
class POFile(_BaseFile):
"""
Po (or Pot) file reader/writer.
@@ -542,13 +614,15 @@
"""
ret, headers = '', self.header.split('\n')
for header in headers:
- if header[:1] in [',', ':']:
+ if not len(header):
+ ret += "#\n"
+ elif header[:1] in [',', ':']:
ret += '#%s\n' % header
else:
ret += '# %s\n' % header
- if type(ret) != types.UnicodeType:
- ret = unicode(ret, self.encoding)
+ if not isinstance(ret, text_type):
+ ret = ret.decode(self.encoding)
return ret + _BaseFile.__unicode__(self)
@@ -572,7 +646,7 @@
if total == 0:
return 100
translated = len(self.translated_entries())
- return int((100.00 / float(total)) * translated)
+ return int(translated * 100 / float(total))
def translated_entries(self):
"""
@@ -584,7 +658,7 @@
"""
Convenience method that returns the list of untranslated entries.
"""
- return [e for e in self if not e.translated() and not e.obsolete \
+ return [e for e in self if not e.translated() and not e.obsolete
and not 'fuzzy' in e.flags]
def fuzzy_entries(self):
@@ -615,28 +689,32 @@
``refpot``
object POFile, the reference catalog.
"""
+ # Store entries in dict/set for faster access
+ self_entries = dict((entry.msgid, entry) for entry in self)
+ refpot_msgids = set(entry.msgid for entry in refpot)
+ # Merge entries that are in the refpot
for entry in refpot:
- e = self.find(entry.msgid, include_obsolete_entries=True)
+ e = self_entries.get(entry.msgid)
if e is None:
e = POEntry()
self.append(e)
e.merge(entry)
# ok, now we must "obsolete" entries that are not in the refpot anymore
for entry in self:
- if refpot.find(entry.msgid) is None:
+ if entry.msgid not in refpot_msgids:
entry.obsolete = True
-
# }}}
# class MOFile {{{
+
class MOFile(_BaseFile):
"""
Mo file reader/writer.
This class inherits the :class:`~polib._BaseFile` class and, by
extension, the python ``list`` type.
"""
- BIG_ENDIAN = 0xde120495
- LITTLE_ENDIAN = 0x950412de
+ MAGIC = 0x950412de
+ MAGIC_SWAPPED = 0xde120495
def __init__(self, *args, **kwargs):
"""
@@ -698,10 +776,10 @@
Convenience method to keep the same interface with POFile instances.
"""
return []
-
# }}}
# class _BaseEntry {{{
+
class _BaseEntry(object):
"""
Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
@@ -753,12 +831,14 @@
ret = []
# write the msgctxt if any
if self.msgctxt is not None:
- ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
+ ret += self._str_field("msgctxt", delflag, "", self.msgctxt,
+ wrapwidth)
# write the msgid
ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
# write the msgid_plural if any
if self.msgid_plural:
- ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth)
+ ret += self._str_field("msgid_plural", delflag, "",
+ self.msgid_plural, wrapwidth)
if self.msgstr_plural:
# write the msgstr_plural if any
msgstrs = self.msgstr_plural
@@ -767,30 +847,34 @@
for index in keys:
msgstr = msgstrs[index]
plural_index = '[%s]' % index
- ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth)
+ ret += self._str_field("msgstr", delflag, plural_index, msgstr,
+ wrapwidth)
else:
# otherwise write the msgstr
- ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
+ ret += self._str_field("msgstr", delflag, "", self.msgstr,
+ wrapwidth)
ret.append('')
- ret = '\n'.join(ret)
-
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
+ ret = u('\n').join(ret)
return ret
- def __str__(self):
- """
- Returns the string representation of the entry.
- """
- return unicode(self).encode(self.encoding)
+ if PY3:
+ def __str__(self):
+ return self.__unicode__()
+ else:
+ def __str__(self):
+ """
+ Returns the string representation of the entry.
+ """
+ return unicode(self).encode(self.encoding)
def __eq__(self, other):
- return unicode(self) == unicode(other)
+ return str(self) == str(other)
- def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
+ def _str_field(self, fieldname, delflag, plural_index, field,
+ wrapwidth=78):
lines = field.splitlines(True)
if len(lines) > 1:
- lines = [''] + lines # start with initial empty line
+ lines = [''] + lines # start with initial empty line
else:
escaped_field = escape(field)
specialchars_count = 0
@@ -804,9 +888,9 @@
real_wrapwidth = wrapwidth - flength + specialchars_count
if wrapwidth > 0 and len(field) > real_wrapwidth:
# Wrap the line but take field name into account
- lines = [''] + [unescape(item) for item in textwrap.wrap(
+ lines = [''] + [unescape(item) for item in wrap(
escaped_field,
- wrapwidth - 2, # 2 for quotes ""
+ wrapwidth - 2, # 2 for quotes ""
drop_whitespace=False,
break_long_words=False
)]
@@ -818,13 +902,13 @@
ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
escape(lines.pop(0)))]
- for mstr in lines:
- ret.append('%s"%s"' % (delflag, escape(mstr)))
+ for line in lines:
+ ret.append('%s"%s"' % (delflag, escape(line)))
return ret
-
# }}}
# class POEntry {{{
+
class POEntry(_BaseEntry):
"""
Represents a po file entry.
@@ -854,6 +938,9 @@
``previous_msgid_plural``
string, the entry previous msgid_plural.
+
+ ``linenum``
+ integer, the line number of the entry
"""
_BaseEntry.__init__(self, *args, **kwargs)
self.comment = kwargs.get('comment', '')
@@ -863,6 +950,7 @@
self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
self.previous_msgid = kwargs.get('previous_msgid', None)
self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
+ self.linenum = kwargs.get('linenum', None)
def __unicode__(self, wrapwidth=78):
"""
@@ -879,7 +967,7 @@
if val:
for comment in val.split('\n'):
if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth:
- ret += textwrap.wrap(
+ ret += wrap(
comment,
wrapwidth,
initial_indent=c[1],
@@ -903,7 +991,7 @@
# what we want for filenames, so the dirty hack is to
# temporally replace hyphens with a char that a file cannot
# contain, like "*"
- ret += [l.replace('*', '-') for l in textwrap.wrap(
+ ret += [l.replace('*', '-') for l in wrap(
filestr.replace('-', '*'),
wrapwidth,
initial_indent='#: ',
@@ -918,32 +1006,25 @@
ret.append('#, %s' % ', '.join(self.flags))
# previous context and previous msgid/msgid_plural
- fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
+ fields = ['previous_msgctxt', 'previous_msgid',
+ 'previous_msgid_plural']
for f in fields:
val = getattr(self, f)
if val:
ret += self._str_field(f, "#| ", "", val, wrapwidth)
ret.append(_BaseEntry.__unicode__(self, wrapwidth))
- ret = '\n'.join(ret)
+ ret = u('\n').join(ret)
- if type(ret) != types.UnicodeType:
- return unicode(ret, self.encoding)
+ assert isinstance(ret, text_type)
+ #if type(ret) != types.UnicodeType:
+ # return unicode(ret, self.encoding)
return ret
def __cmp__(self, other):
"""
Called by comparison operations if rich comparison is not defined.
"""
- def compare_occurrences(a, b):
- """
- Compare an entry occurrence with another one.
- """
- if a[0] != b[0]:
- return a[0] < b[0]
- if a[1] != b[1]:
- return a[1] < b[1]
- return 0
# First: Obsolete test
if self.obsolete != other.obsolete:
@@ -952,12 +1033,8 @@
else:
return 1
# Work on a copy to protect original
- occ1 = self.occurrences[:]
- occ2 = other.occurrences[:]
- # Sorting using compare method
- occ1.sort(compare_occurrences)
- occ2.sort(compare_occurrences)
- # Comparing sorted occurrences
+ occ1 = sorted(self.occurrences[:])
+ occ2 = sorted(other.occurrences[:])
pos = 0
for entry1 in occ1:
try:
@@ -975,9 +1052,41 @@
return 1
else:
return -1
+ # Compare msgid_plural if set
+ if self.msgid_plural:
+ if not other.msgid_plural:
+ return 1
+ for pos in self.msgid_plural:
+ if pos not in other.msgid_plural:
+ return 1
+ if self.msgid_plural[pos] > other.msgid_plural[pos]:
+ return 1
+ if self.msgid_plural[pos] < other.msgid_plural[pos]:
+ return -1
# Finally: Compare message ID
- if self.msgid > other.msgid: return 1
- else: return -1
+ if self.msgid > other.msgid:
+ return 1
+ elif self.msgid < other.msgid:
+ return -1
+ return 0
+
+ def __gt__(self, other):
+ return self.__cmp__(other) > 0
+
+ def __lt__(self, other):
+ return self.__cmp__(other) < 0
+
+ def __ge__(self, other):
+ return self.__cmp__(other) >= 0
+
+ def __le__(self, other):
+ return self.__cmp__(other) <= 0
+
+ def __eq__(self, other):
+ return self.__cmp__(other) == 0
+
+ def __ne__(self, other):
+ return self.__cmp__(other) != 0
def translated(self):
"""
@@ -1020,18 +1129,49 @@
except KeyError:
self.msgstr_plural[pos] = ''
+ def __hash__(self):
+ return hash((self.msgid, self.msgstr))
# }}}
# class MOEntry {{{
+
class MOEntry(_BaseEntry):
"""
Represents a mo file entry.
"""
- pass
+ def __init__(self, *args, **kwargs):
+ """
+ Constructor, accepts the following keyword arguments,
+ for consistency with :class:`~polib.POEntry`:
+
+ ``comment``
+ ``tcomment``
+ ``occurrences``
+ ``flags``
+ ``previous_msgctxt``
+ ``previous_msgid``
+ ``previous_msgid_plural``
+
+ Note: even though these keyword arguments are accepted,
+ they hold no real meaning in the context of MO files
+ and are simply ignored.
+ """
+ _BaseEntry.__init__(self, *args, **kwargs)
+ self.comment = ''
+ self.tcomment = ''
+ self.occurrences = []
+ self.flags = []
+ self.previous_msgctxt = None
+ self.previous_msgid = None
+ self.previous_msgid_plural = None
+
+ def __hash__(self):
+ return hash((self.msgid, self.msgstr))
# }}}
# class _POFileParser {{{
+
class _POFileParser(object):
"""
A finite state machine to parse efficiently and correctly po
@@ -1056,23 +1196,27 @@
file (optional, default: ``False``).
"""
enc = kwargs.get('encoding', default_encoding)
- if os.path.exists(pofile):
+ if _is_file(pofile):
try:
- self.fhandle = codecs.open(pofile, 'rU', enc)
+ self.fhandle = io.open(pofile, 'rt', encoding=enc)
except LookupError:
enc = default_encoding
- self.fhandle = codecs.open(pofile, 'rU', enc)
+ self.fhandle = io.open(pofile, 'rt', encoding=enc)
else:
self.fhandle = pofile.splitlines()
- self.instance = POFile(
+ klass = kwargs.get('klass')
+ if klass is None:
+ klass = POFile
+ self.instance = klass(
pofile=pofile,
encoding=enc,
check_for_duplicates=kwargs.get('check_for_duplicates', False)
)
self.transitions = {}
- self.current_entry = POEntry()
- self.current_state = 'ST'
+ self.current_line = 0
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_state = 'st'
self.current_token = None
# two memo flags used in handlers
self.msgstr_index = 0
@@ -1083,7 +1227,7 @@
# * HE: Header
# * TC: a translation comment
# * GC: a generated comment
- # * OC: a file/line occurence
+ # * OC: a file/line occurrence
# * FL: a flags line
# * CT: a message context
# * PC: a previous msgctxt
@@ -1094,48 +1238,47 @@
# * MS: a msgstr
# * MX: a msgstr plural
# * MC: a msgid or msgstr continuation line
- all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC',
- 'MS', 'MP', 'MX', 'MI']
+ all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
+ 'ms', 'mp', 'mx', 'mi']
- self.add('TC', ['ST', 'HE'], 'HE')
- self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS',
- 'MP', 'MX', 'MI'], 'TC')
- self.add('GC', all, 'GC')
- self.add('OC', all, 'OC')
- self.add('FL', all, 'FL')
- self.add('PC', all, 'PC')
- self.add('PM', all, 'PM')
- self.add('PP', all, 'PP')
- self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM',
- 'PP', 'MS', 'MX'], 'CT')
- self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC',
- 'PM', 'PP', 'MS', 'MX'], 'MI')
- self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP')
- self.add('MS', ['MI', 'MP', 'TC'], 'MS')
- self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
- self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC')
+ self.add('tc', ['st', 'he'], 'he')
+ self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
+ 'mp', 'mx', 'mi'], 'tc')
+ self.add('gc', all, 'gc')
+ self.add('oc', all, 'oc')
+ self.add('fl', all, 'fl')
+ self.add('pc', all, 'pc')
+ self.add('pm', all, 'pm')
+ self.add('pp', all, 'pp')
+ self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
+ 'pp', 'ms', 'mx'], 'ct')
+ self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
+ 'pm', 'pp', 'ms', 'mx'], 'mi')
+ self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
+ self.add('ms', ['mi', 'mp', 'tc'], 'ms')
+ self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
+ self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
def parse(self):
"""
Run the state machine, parse the file line by line and call process()
with the current matched symbol.
"""
- i = 0
keywords = {
- 'msgctxt': 'CT',
- 'msgid': 'MI',
- 'msgstr': 'MS',
- 'msgid_plural': 'MP',
+ 'msgctxt': 'ct',
+ 'msgid': 'mi',
+ 'msgstr': 'ms',
+ 'msgid_plural': 'mp',
}
prev_keywords = {
- 'msgid_plural': 'PP',
- 'msgid': 'PM',
- 'msgctxt': 'PC',
+ 'msgid_plural': 'pp',
+ 'msgid': 'pm',
+ 'msgctxt': 'pc',
}
-
+ tokens = []
for line in self.fhandle:
- i += 1
+ self.current_line += 1
line = line.strip()
if line == '':
continue
@@ -1143,6 +1286,9 @@
tokens = line.split(None, 2)
nb_tokens = len(tokens)
+ if tokens[0] == '#~|':
+ continue
+
if tokens[0] == '#~' and nb_tokens > 1:
line = line[3:].strip()
tokens = tokens[1:]
@@ -1155,41 +1301,56 @@
# msgid, msgid_plural, msgctxt & msgstr.
if tokens[0] in keywords and nb_tokens > 1:
line = line[len(tokens[0]):].lstrip()
+ if re.search(r'([^\\]|^)"', line[1:-1]):
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unescaped double quote found' %
+ (self.instance.fpath, self.current_line))
self.current_token = line
- self.process(keywords[tokens[0]], i)
+ self.process(keywords[tokens[0]])
continue
self.current_token = line
- if tokens[0] == '#:' and nb_tokens > 1:
+ if tokens[0] == '#:':
+ if nb_tokens <= 1:
+ continue
# we are on a occurrences line
- self.process('OC', i)
+ self.process('oc')
elif line[:1] == '"':
# we are on a continuation line
- self.process('MC', i)
+ if re.search(r'([^\\]|^)"', line[1:-1]):
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unescaped double quote found' %
+ (self.instance.fpath, self.current_line))
+ self.process('mc')
elif line[:7] == 'msgstr[':
# we are on a msgstr plural
- self.process('MX', i)
+ self.process('mx')
- elif tokens[0] == '#,' and nb_tokens > 1:
+ elif tokens[0] == '#,':
+ if nb_tokens <= 1:
+ continue
# we are on a flags line
- self.process('FL', i)
+ self.process('fl')
- elif tokens[0] == '#':
- if line == '#': line += ' '
+ elif tokens[0] == '#' or tokens[0].startswith('##'):
+ if line == '#':
+ line += ' '
# we are on a translator comment line
- self.process('TC', i)
+ self.process('tc')
- elif tokens[0] == '#.' and nb_tokens > 1:
+ elif tokens[0] == '#.':
+ if nb_tokens <= 1:
+ continue
# we are on a generated comment line
- self.process('GC', i)
+ self.process('gc')
elif tokens[0] == '#|':
- if nb_tokens < 2:
- self.process('??', i)
- continue
+ if nb_tokens <= 1:
+ raise IOError('Syntax error in po file %s (line %s)' %
+ (self.instance.fpath, self.current_line))
# Remove the marker and any whitespace right after that.
line = line[2:].lstrip()
@@ -1197,48 +1358,57 @@
if tokens[1].startswith('"'):
# Continuation of previous metadata.
- self.process('MC', i)
+ self.process('mc')
continue
if nb_tokens == 2:
# Invalid continuation line.
- self.process('??', i)
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'invalid continuation line' %
+ (self.instance.fpath, self.current_line))
# we are on a "previous translation" comment line,
if tokens[1] not in prev_keywords:
# Unknown keyword in previous translation comment.
- self.process('??', i)
+ raise IOError('Syntax error in po file %s (line %s): '
+ 'unknown keyword %s' %
+ (self.instance.fpath, self.current_line,
+ tokens[1]))
# Remove the keyword and any whitespace
# between it and the starting quote.
line = line[len(tokens[1]):].lstrip()
self.current_token = line
- self.process(prev_keywords[tokens[1]], i)
+ self.process(prev_keywords[tokens[1]])
else:
- self.process('??', i)
+ raise IOError('Syntax error in po file %s (line %s)' %
+ (self.instance.fpath, self.current_line))
- if self.current_entry:
+ if self.current_entry and len(tokens) > 0 and \
+ not tokens[0].startswith('#'):
# since entries are added when another entry is found, we must add
- # the last entry here (only if there are lines)
+ # the last entry here (only if there are lines). Trailing comments
+ # are ignored
self.instance.append(self.current_entry)
+
# before returning the instance, check if there's metadata and if
# so extract it in a dict
- firstentry = self.instance[0]
- if firstentry.msgid == '': # metadata found
+ metadataentry = self.instance.find('')
+ if metadataentry: # metadata found
# remove the entry
- firstentry = self.instance.pop(0)
- self.instance.metadata_is_fuzzy = firstentry.flags
+ self.instance.remove(metadataentry)
+ self.instance.metadata_is_fuzzy = metadataentry.flags
key = None
- for msg in firstentry.msgstr.splitlines():
+ for msg in metadataentry.msgstr.splitlines():
try:
key, val = msg.split(':', 1)
self.instance.metadata[key] = val.strip()
- except:
+ except (ValueError, KeyError):
if key is not None:
- self.instance.metadata[key] += '\n'+ msg.strip()
+ self.instance.metadata[key] += '\n' + msg.strip()
# close opened file
- if isinstance(self.fhandle, file):
+ if not isinstance(self.fhandle, list): # must be file
self.fhandle.close()
return self.instance
@@ -1258,10 +1428,10 @@
the next state the fsm will have after the action.
"""
for state in states:
- action = getattr(self, 'handle_%s' % next_state.lower())
+ action = getattr(self, 'handle_%s' % next_state)
self.transitions[(symbol, state)] = (action, next_state)
- def process(self, symbol, linenum):
+ def process(self, symbol):
"""
Process the transition corresponding to the current state and the
symbol provided.
@@ -1278,8 +1448,9 @@
(action, state) = self.transitions[(symbol, self.current_state)]
if action():
self.current_state = state
- except Exception as exc:
- raise IOError('Syntax error in po file (line %s)' % linenum)
+ except Exception:
+ raise IOError('Syntax error in po file (line %s)' %
+ self.current_line)
# state handlers
@@ -1292,90 +1463,94 @@
def handle_tc(self):
"""Handle a translator comment."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
if self.current_entry.tcomment != '':
self.current_entry.tcomment += '\n'
- self.current_entry.tcomment += self.current_token[2:]
+ tcomment = self.current_token.lstrip('#')
+ if tcomment.startswith(' '):
+ tcomment = tcomment[1:]
+ self.current_entry.tcomment += tcomment
return True
def handle_gc(self):
"""Handle a generated comment."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
if self.current_entry.comment != '':
self.current_entry.comment += '\n'
self.current_entry.comment += self.current_token[3:]
return True
def handle_oc(self):
- """Handle a file:num occurence."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ """Handle a file:num occurrence."""
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
occurrences = self.current_token[3:].split()
for occurrence in occurrences:
if occurrence != '':
try:
fil, line = occurrence.split(':')
if not line.isdigit():
- fil = fil + line
+ fil = fil + line
line = ''
self.current_entry.occurrences.append((fil, line))
- except:
+ except (ValueError, AttributeError):
self.current_entry.occurrences.append((occurrence, ''))
return True
def handle_fl(self):
"""Handle a flags line."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
- self.current_entry.flags += self.current_token[3:].split(', ')
+ self.current_entry = POEntry(linenum=self.current_line)
+ self.current_entry.flags += [c.strip() for c in
+ self.current_token[3:].split(',')]
return True
def handle_pp(self):
"""Handle a previous msgid_plural line."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
self.current_entry.previous_msgid_plural = \
unescape(self.current_token[1:-1])
return True
def handle_pm(self):
"""Handle a previous msgid line."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
self.current_entry.previous_msgid = \
unescape(self.current_token[1:-1])
return True
def handle_pc(self):
"""Handle a previous msgctxt line."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
self.current_entry.previous_msgctxt = \
unescape(self.current_token[1:-1])
return True
def handle_ct(self):
"""Handle a msgctxt."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
self.current_entry.msgctxt = unescape(self.current_token[1:-1])
return True
def handle_mi(self):
"""Handle a msgid."""
- if self.current_state in ['MC', 'MS', 'MX']:
+ if self.current_state in ['mc', 'ms', 'mx']:
self.instance.append(self.current_entry)
- self.current_entry = POEntry()
+ self.current_entry = POEntry(linenum=self.current_line)
self.current_entry.obsolete = self.entry_obsolete
self.current_entry.msgid = unescape(self.current_token[1:-1])
return True
@@ -1392,47 +1567,37 @@
def handle_mx(self):
"""Handle a msgstr plural."""
- index, value = self.current_token[7], self.current_token[11:-1]
- self.current_entry.msgstr_plural[index] = unescape(value)
- self.msgstr_index = index
+ index = self.current_token[7]
+ value = self.current_token[self.current_token.find('"') + 1:-1]
+ self.current_entry.msgstr_plural[int(index)] = unescape(value)
+ self.msgstr_index = int(index)
return True
def handle_mc(self):
"""Handle a msgid or msgstr continuation line."""
token = unescape(self.current_token[1:-1])
- if self.current_state == 'CT':
- typ = 'msgctxt'
+ if self.current_state == 'ct':
self.current_entry.msgctxt += token
- elif self.current_state == 'MI':
- typ = 'msgid'
+ elif self.current_state == 'mi':
self.current_entry.msgid += token
- elif self.current_state == 'MP':
- typ = 'msgid_plural'
+ elif self.current_state == 'mp':
self.current_entry.msgid_plural += token
- elif self.current_state == 'MS':
- typ = 'msgstr'
+ elif self.current_state == 'ms':
self.current_entry.msgstr += token
- elif self.current_state == 'MX':
- typ = 'msgstr[%s]' % self.msgstr_index
+ elif self.current_state == 'mx':
self.current_entry.msgstr_plural[self.msgstr_index] += token
- elif self.current_state == 'PP':
- typ = 'previous_msgid_plural'
- token = token[3:]
+ elif self.current_state == 'pp':
self.current_entry.previous_msgid_plural += token
- elif self.current_state == 'PM':
- typ = 'previous_msgid'
- token = token[3:]
+ elif self.current_state == 'pm':
self.current_entry.previous_msgid += token
- elif self.current_state == 'PC':
- typ = 'previous_msgctxt'
- token = token[3:]
+ elif self.current_state == 'pc':
self.current_entry.previous_msgctxt += token
# don't change the current state
return False
-
# }}}
# class _MOFileParser {{{
+
class _MOFileParser(object):
"""
A class to parse binary mo files.
@@ -1456,12 +1621,24 @@
file (optional, default: ``False``).
"""
self.fhandle = open(mofile, 'rb')
- self.instance = MOFile(
+
+ klass = kwargs.get('klass')
+ if klass is None:
+ klass = MOFile
+ self.instance = klass(
fpath=mofile,
encoding=kwargs.get('encoding', default_encoding),
check_for_duplicates=kwargs.get('check_for_duplicates', False)
)
+ def __del__(self):
+ """
+ Make sure the file is closed, this prevents warnings on unclosed file
+ when running tests with python >= 3.2.
+ """
+ if self.fhandle:
+ self.fhandle.close()
+
def parse(self):
"""
Build the instance with the file handle provided in the
@@ -1469,15 +1646,20 @@
"""
# parse magic number
magic_number = self._readbinary('<I', 4)
- if magic_number == MOFile.LITTLE_ENDIAN:
+ if magic_number == MOFile.MAGIC:
ii = '<II'
- elif magic_number == MOFile.BIG_ENDIAN:
+ elif magic_number == MOFile.MAGIC_SWAPPED:
ii = '>II'
else:
raise IOError('Invalid mo file, magic number is incorrect !')
self.instance.magic_number = magic_number
# parse the version number and the number of strings
- self.instance.version, numofstrings = self._readbinary(ii, 8)
+ version, numofstrings = self._readbinary(ii, 8)
+ # from MO file format specs: "A program seeing an unexpected major
+ # revision number should stop reading the MO file entirely"
+ if version not in (0, 1):
+ raise IOError('Invalid mo file, unexpected major revision number')
+ self.instance.version = version
# original strings and translation strings hash table offset
msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
# move to msgid hash table and read length and offset of msgids
@@ -1491,29 +1673,34 @@
for i in range(numofstrings):
msgstrs_index.append(self._readbinary(ii, 8))
# build entries
+ encoding = self.instance.encoding
for i in range(numofstrings):
self.fhandle.seek(msgids_index[i][1])
msgid = self.fhandle.read(msgids_index[i][0])
+
self.fhandle.seek(msgstrs_index[i][1])
msgstr = self.fhandle.read(msgstrs_index[i][0])
- if i == 0: # metadata
- raw_metadata, metadata = msgstr.split('\n'), {}
+ if i == 0 and not msgid: # metadata
+ raw_metadata, metadata = msgstr.split(b('\n')), {}
for line in raw_metadata:
- tokens = line.split(':', 1)
- if tokens[0] != '':
+ tokens = line.split(b(':'), 1)
+ if tokens[0] != b(''):
try:
- metadata[tokens[0]] = tokens[1].strip()
+ k = tokens[0].decode(encoding)
+ v = tokens[1].decode(encoding)
+ metadata[k] = v.strip()
except IndexError:
- metadata[tokens[0]] = ''
+ metadata[k] = u('')
self.instance.metadata = metadata
continue
# test if we have a plural entry
- msgid_tokens = msgid.split('\0')
+ msgid_tokens = msgid.split(b('\0'))
if len(msgid_tokens) > 1:
entry = self._build_entry(
msgid=msgid_tokens[0],
msgid_plural=msgid_tokens[1],
- msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0')))
+ msgstr_plural=dict((k, v) for k, v in
+ enumerate(msgstr.split(b('\0'))))
)
else:
entry = self._build_entry(msgid=msgid, msgstr=msgstr)
@@ -1524,19 +1711,22 @@
def _build_entry(self, msgid, msgstr=None, msgid_plural=None,
msgstr_plural=None):
- msgctxt_msgid = msgid.split('\x04')
+ msgctxt_msgid = msgid.split(b('\x04'))
+ encoding = self.instance.encoding
if len(msgctxt_msgid) > 1:
kwargs = {
- 'msgctxt': msgctxt_msgid[0],
- 'msgid' : msgctxt_msgid[1],
+ 'msgctxt': msgctxt_msgid[0].decode(encoding),
+ 'msgid': msgctxt_msgid[1].decode(encoding),
}
else:
- kwargs = {'msgid': msgid}
+ kwargs = {'msgid': msgid.decode(encoding)}
if msgstr:
- kwargs['msgstr'] = msgstr
+ kwargs['msgstr'] = msgstr.decode(encoding)
if msgid_plural:
- kwargs['msgid_plural'] = msgid_plural
+ kwargs['msgid_plural'] = msgid_plural.decode(encoding)
if msgstr_plural:
+ for k in msgstr_plural:
+ msgstr_plural[k] = msgstr_plural[k].decode(encoding)
kwargs['msgstr_plural'] = msgstr_plural
return MOEntry(**kwargs)
@@ -1550,5 +1740,99 @@
if len(tup) == 1:
return tup[0]
return tup
+# }}}
+# class TextWrapper {{{
+
+
+class TextWrapper(textwrap.TextWrapper):
+ """
+ Subclass of textwrap.TextWrapper that backport the
+ drop_whitespace option.
+ """
+ def __init__(self, *args, **kwargs):
+ drop_whitespace = kwargs.pop('drop_whitespace', True)
+ textwrap.TextWrapper.__init__(self, *args, **kwargs)
+ self.drop_whitespace = drop_whitespace
+
+ def _wrap_chunks(self, chunks):
+ """_wrap_chunks(chunks : [string]) -> [string]
+
+ Wrap a sequence of text chunks and return a list of lines of
+ length 'self.width' or less. (If 'break_long_words' is false,
+ some lines may be longer than this.) Chunks correspond roughly
+ to words and the whitespace between them: each chunk is
+ indivisible (modulo 'break_long_words'), but a line break can
+ come between any two chunks. Chunks should not have internal
+ whitespace; ie. a chunk is either all whitespace or a "word".
+ Whitespace chunks will be removed from the beginning and end of
+ lines, but apart from that whitespace is preserved.
+ """
+ lines = []
+ if self.width <= 0:
+ raise ValueError("invalid width %r (must be > 0)" % self.width)
+
+ # Arrange in reverse order so items can be efficiently popped
+ # from a stack of chucks.
+ chunks.reverse()
+
+ while chunks:
+
+ # Start the list of chunks that will make up the current line.
+ # cur_len is just the length of all the chunks in cur_line.
+ cur_line = []
+ cur_len = 0
+
+ # Figure out which static string will prefix this line.
+ if lines:
+ indent = self.subsequent_indent
+ else:
+ indent = self.initial_indent
+
+ # Maximum width for this line.
+ width = self.width - len(indent)
+
+ # First chunk on line is whitespace -- drop it, unless this
+ # is the very beginning of the text (ie. no lines started yet).
+ if self.drop_whitespace and chunks[-1].strip() == '' and lines:
+ del chunks[-1]
+
+ while chunks:
+ l = len(chunks[-1])
+
+ # Can at least squeeze this chunk onto the current line.
+ if cur_len + l <= width:
+ cur_line.append(chunks.pop())
+ cur_len += l
+
+ # Nope, this line is full.
+ else:
+ break
+
+ # The current line is full, and the next chunk is too big to
+ # fit on *any* line (not just this one).
+ if chunks and len(chunks[-1]) > width:
+ self._handle_long_word(chunks, cur_line, cur_len, width)
+
+ # If the last chunk on this line is all whitespace, drop it.
+ if self.drop_whitespace and cur_line and not cur_line[-1].strip():
+ del cur_line[-1]
+
+ # Convert current line back to a string and store it in list
+ # of all lines (return value).
+ if cur_line:
+ lines.append(indent + ''.join(cur_line))
+
+ return lines
+# }}}
+# function wrap() {{{
+
+
+def wrap(text, width=70, **kwargs):
+ """
+ Wrap a single paragraph of text, returning a list of wrapped lines.
+ """
+ if sys.version_info < (2, 6):
+ return TextWrapper(width=width, **kwargs).wrap(text)
+ return textwrap.wrap(text, width=width, **kwargs)
# }}}