Mercurial > hg
comparison i18n/polib.py @ 40185:19fc5a986669
polib: update to latest release 1.0.7 (upstream rev d75ce6dbbc2a)
# no-check-commit third-party code that doesn't match our style
Differential Revision: https://phab.mercurial-scm.org/D5001
author | Augie Fackler <augie@google.com> |
---|---|
date | Fri, 12 Oct 2018 11:44:27 -0400 |
parents | a7310a477966 |
children | 2372284d9457 |
comparison
equal
deleted
inserted
replaced
40184:c3b7d9c54edd | 40185:19fc5a986669 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # no-check-code | 1 # no-check-code |
2 # -* coding: utf-8 -*- | |
3 # | 3 # |
4 # License: MIT (see LICENSE file provided) | 4 # License: MIT (see LICENSE file provided) |
5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: | 5 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: |
6 | 6 |
7 """ | 7 """ |
13 :func:`~polib.mofile` convenience functions. | 13 :func:`~polib.mofile` convenience functions. |
14 """ | 14 """ |
15 | 15 |
16 from __future__ import absolute_import | 16 from __future__ import absolute_import |
17 | 17 |
18 __author__ = 'David Jean Louis <izimobil@gmail.com>' | 18 __author__ = 'David Jean Louis <izimobil@gmail.com>' |
19 __version__ = '0.6.4' | 19 __version__ = '1.0.7' |
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', | 20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', |
21 'detect_encoding', 'escape', 'unescape', 'detect_encoding',] | 21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ] |
22 | 22 |
23 import array | 23 import array |
24 import codecs | 24 import codecs |
25 import os | 25 import os |
26 import re | 26 import re |
27 import struct | 27 import struct |
28 import sys | 28 import sys |
29 import textwrap | 29 import textwrap |
30 import types | 30 |
31 try: | |
32 import io | |
33 except ImportError: | |
34 # replacement of io.open() for python < 2.6 | |
35 # we use codecs instead | |
36 class io(object): | |
37 @staticmethod | |
38 def open(fpath, mode='r', encoding=None): | |
39 return codecs.open(fpath, mode, encoding) | |
31 | 40 |
32 | 41 |
33 # the default encoding to use when encoding cannot be detected | 42 # the default encoding to use when encoding cannot be detected |
34 default_encoding = 'utf-8' | 43 default_encoding = 'utf-8' |
35 | 44 |
45 # python 2/3 compatibility helpers {{{ | |
46 | |
47 | |
48 if sys.version_info[:2] < (3, 0): | |
49 PY3 = False | |
50 text_type = unicode | |
51 | |
52 def b(s): | |
53 return s | |
54 | |
55 def u(s): | |
56 return unicode(s, "unicode_escape") | |
57 | |
58 else: | |
59 PY3 = True | |
60 text_type = str | |
61 | |
62 def b(s): | |
63 return s.encode("latin-1") | |
64 | |
65 def u(s): | |
66 return s | |
67 # }}} | |
36 # _pofile_or_mofile {{{ | 68 # _pofile_or_mofile {{{ |
69 | |
37 | 70 |
38 def _pofile_or_mofile(f, type, **kwargs): | 71 def _pofile_or_mofile(f, type, **kwargs): |
39 """ | 72 """ |
40 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to | 73 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to |
41 honor the DRY concept. | 74 honor the DRY concept. |
48 # parse the file | 81 # parse the file |
49 kls = type == 'pofile' and _POFileParser or _MOFileParser | 82 kls = type == 'pofile' and _POFileParser or _MOFileParser |
50 parser = kls( | 83 parser = kls( |
51 f, | 84 f, |
52 encoding=enc, | 85 encoding=enc, |
53 check_for_duplicates=kwargs.get('check_for_duplicates', False) | 86 check_for_duplicates=kwargs.get('check_for_duplicates', False), |
87 klass=kwargs.get('klass') | |
54 ) | 88 ) |
55 instance = parser.parse() | 89 instance = parser.parse() |
56 instance.wrapwidth = kwargs.get('wrapwidth', 78) | 90 instance.wrapwidth = kwargs.get('wrapwidth', 78) |
57 return instance | 91 return instance |
58 | 92 # }}} |
93 # _is_file {{{ | |
94 | |
95 | |
96 def _is_file(filename_or_contents): | |
97 """ | |
98 Safely returns the value of os.path.exists(filename_or_contents). | |
99 | |
100 Arguments: | |
101 | |
102 ``filename_or_contents`` | |
103 either a filename, or a string holding the contents of some file. | |
104 In the latter case, this function will always return False. | |
105 """ | |
106 try: | |
107 return os.path.exists(filename_or_contents) | |
108 except (ValueError, UnicodeEncodeError): | |
109 return False | |
59 # }}} | 110 # }}} |
60 # function pofile() {{{ | 111 # function pofile() {{{ |
112 | |
61 | 113 |
62 def pofile(pofile, **kwargs): | 114 def pofile(pofile, **kwargs): |
63 """ | 115 """ |
64 Convenience function that parses the po or pot file ``pofile`` and returns | 116 Convenience function that parses the po or pot file ``pofile`` and returns |
65 a :class:`~polib.POFile` instance. | 117 a :class:`~polib.POFile` instance. |
78 encoding will be auto-detected). | 130 encoding will be auto-detected). |
79 | 131 |
80 ``check_for_duplicates`` | 132 ``check_for_duplicates`` |
81 whether to check for duplicate entries when adding entries to the | 133 whether to check for duplicate entries when adding entries to the |
82 file (optional, default: ``False``). | 134 file (optional, default: ``False``). |
135 | |
136 ``klass`` | |
137 class which is used to instantiate the return value (optional, | |
138 default: ``None``, the return value with be a :class:`~polib.POFile` | |
139 instance). | |
83 """ | 140 """ |
84 return _pofile_or_mofile(pofile, 'pofile', **kwargs) | 141 return _pofile_or_mofile(pofile, 'pofile', **kwargs) |
85 | |
86 # }}} | 142 # }}} |
87 # function mofile() {{{ | 143 # function mofile() {{{ |
144 | |
88 | 145 |
89 def mofile(mofile, **kwargs): | 146 def mofile(mofile, **kwargs): |
90 """ | 147 """ |
91 Convenience function that parses the mo file ``mofile`` and returns a | 148 Convenience function that parses the mo file ``mofile`` and returns a |
92 :class:`~polib.MOFile` instance. | 149 :class:`~polib.MOFile` instance. |
106 encoding will be auto-detected). | 163 encoding will be auto-detected). |
107 | 164 |
108 ``check_for_duplicates`` | 165 ``check_for_duplicates`` |
109 whether to check for duplicate entries when adding entries to the | 166 whether to check for duplicate entries when adding entries to the |
110 file (optional, default: ``False``). | 167 file (optional, default: ``False``). |
168 | |
169 ``klass`` | |
170 class which is used to instantiate the return value (optional, | |
171 default: ``None``, the return value with be a :class:`~polib.POFile` | |
172 instance). | |
111 """ | 173 """ |
112 return _pofile_or_mofile(mofile, 'mofile', **kwargs) | 174 return _pofile_or_mofile(mofile, 'mofile', **kwargs) |
113 | |
114 # }}} | 175 # }}} |
115 # function detect_encoding() {{{ | 176 # function detect_encoding() {{{ |
177 | |
116 | 178 |
117 def detect_encoding(file, binary_mode=False): | 179 def detect_encoding(file, binary_mode=False): |
118 """ | 180 """ |
119 Try to detect the encoding used by the ``file``. The ``file`` argument can | 181 Try to detect the encoding used by the ``file``. The ``file`` argument can |
120 be a PO or MO file path or a string containing the contents of the file. | 182 be a PO or MO file path or a string containing the contents of the file. |
127 string, full or relative path to the po/mo file or its content. | 189 string, full or relative path to the po/mo file or its content. |
128 | 190 |
129 ``binary_mode`` | 191 ``binary_mode`` |
130 boolean, set this to True if ``file`` is a mo file. | 192 boolean, set this to True if ``file`` is a mo file. |
131 """ | 193 """ |
132 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') | 194 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)' |
195 rxt = re.compile(u(PATTERN)) | |
196 rxb = re.compile(b(PATTERN)) | |
133 | 197 |
134 def charset_exists(charset): | 198 def charset_exists(charset): |
135 """Check whether ``charset`` is valid or not.""" | 199 """Check whether ``charset`` is valid or not.""" |
136 try: | 200 try: |
137 codecs.lookup(charset) | 201 codecs.lookup(charset) |
138 except LookupError: | 202 except LookupError: |
139 return False | 203 return False |
140 return True | 204 return True |
141 | 205 |
142 if not os.path.exists(file): | 206 if not _is_file(file): |
143 match = rx.search(file) | 207 match = rxt.search(file) |
144 if match: | 208 if match: |
145 enc = match.group(1).strip() | 209 enc = match.group(1).strip() |
146 if charset_exists(enc): | 210 if charset_exists(enc): |
147 return enc | 211 return enc |
148 else: | 212 else: |
149 if binary_mode: | 213 # For PY3, always treat as binary |
214 if binary_mode or PY3: | |
150 mode = 'rb' | 215 mode = 'rb' |
216 rx = rxb | |
151 else: | 217 else: |
152 mode = 'r' | 218 mode = 'r' |
219 rx = rxt | |
153 f = open(file, mode) | 220 f = open(file, mode) |
154 for l in f.readlines(): | 221 for l in f.readlines(): |
155 match = rx.search(l) | 222 match = rx.search(l) |
156 if match: | 223 if match: |
157 f.close() | 224 f.close() |
158 enc = match.group(1).strip() | 225 enc = match.group(1).strip() |
226 if not isinstance(enc, text_type): | |
227 enc = enc.decode('utf-8') | |
159 if charset_exists(enc): | 228 if charset_exists(enc): |
160 return enc | 229 return enc |
161 f.close() | 230 f.close() |
162 return default_encoding | 231 return default_encoding |
163 | |
164 # }}} | 232 # }}} |
165 # function escape() {{{ | 233 # function escape() {{{ |
234 | |
166 | 235 |
167 def escape(st): | 236 def escape(st): |
168 """ | 237 """ |
169 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in | 238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in |
170 the given string ``st`` and returns it. | 239 the given string ``st`` and returns it. |
172 return st.replace('\\', r'\\')\ | 241 return st.replace('\\', r'\\')\ |
173 .replace('\t', r'\t')\ | 242 .replace('\t', r'\t')\ |
174 .replace('\r', r'\r')\ | 243 .replace('\r', r'\r')\ |
175 .replace('\n', r'\n')\ | 244 .replace('\n', r'\n')\ |
176 .replace('\"', r'\"') | 245 .replace('\"', r'\"') |
177 | |
178 # }}} | 246 # }}} |
179 # function unescape() {{{ | 247 # function unescape() {{{ |
248 | |
180 | 249 |
181 def unescape(st): | 250 def unescape(st): |
182 """ | 251 """ |
183 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in | 252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in |
184 the given string ``st`` and returns it. | 253 the given string ``st`` and returns it. |
191 return '\t' | 260 return '\t' |
192 if m == 'r': | 261 if m == 'r': |
193 return '\r' | 262 return '\r' |
194 if m == '\\': | 263 if m == '\\': |
195 return '\\' | 264 return '\\' |
196 return m # handles escaped double quote | 265 return m # handles escaped double quote |
197 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) | 266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) |
198 | |
199 # }}} | 267 # }}} |
200 # class _BaseFile {{{ | 268 # class _BaseFile {{{ |
269 | |
201 | 270 |
202 class _BaseFile(list): | 271 class _BaseFile(list): |
203 """ | 272 """ |
204 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile` | 273 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile` |
205 classes. This class should **not** be instanciated directly. | 274 classes. This class should **not** be instanciated directly. |
225 file, (optional, default: ``False``). | 294 file, (optional, default: ``False``). |
226 """ | 295 """ |
227 list.__init__(self) | 296 list.__init__(self) |
228 # the opened file handle | 297 # the opened file handle |
229 pofile = kwargs.get('pofile', None) | 298 pofile = kwargs.get('pofile', None) |
230 if pofile and os.path.exists(pofile): | 299 if pofile and _is_file(pofile): |
231 self.fpath = pofile | 300 self.fpath = pofile |
232 else: | 301 else: |
233 self.fpath = kwargs.get('fpath') | 302 self.fpath = kwargs.get('fpath') |
234 # the width at which lines should be wrapped | 303 # the width at which lines should be wrapped |
235 self.wrapwidth = kwargs.get('wrapwidth', 78) | 304 self.wrapwidth = kwargs.get('wrapwidth', 78) |
252 [e for e in self if not e.obsolete] | 321 [e for e in self if not e.obsolete] |
253 for entry in entries: | 322 for entry in entries: |
254 ret.append(entry.__unicode__(self.wrapwidth)) | 323 ret.append(entry.__unicode__(self.wrapwidth)) |
255 for entry in self.obsolete_entries(): | 324 for entry in self.obsolete_entries(): |
256 ret.append(entry.__unicode__(self.wrapwidth)) | 325 ret.append(entry.__unicode__(self.wrapwidth)) |
257 ret = '\n'.join(ret) | 326 ret = u('\n').join(ret) |
258 | 327 |
259 if type(ret) != types.UnicodeType: | 328 assert isinstance(ret, text_type) |
260 return unicode(ret, self.encoding) | 329 #if type(ret) != text_type: |
330 # return unicode(ret, self.encoding) | |
261 return ret | 331 return ret |
262 | 332 |
263 def __str__(self): | 333 if PY3: |
264 """ | 334 def __str__(self): |
265 Returns the string representation of the file. | 335 return self.__unicode__() |
266 """ | 336 else: |
267 return unicode(self).encode(self.encoding) | 337 def __str__(self): |
338 """ | |
339 Returns the string representation of the file. | |
340 """ | |
341 return unicode(self).encode(self.encoding) | |
268 | 342 |
269 def __contains__(self, entry): | 343 def __contains__(self, entry): |
270 """ | 344 """ |
271 Overriden ``list`` method to implement the membership test (in and | 345 Overridden ``list`` method to implement the membership test (in and |
272 not in). | 346 not in). |
273 The method considers that an entry is in the file if it finds an entry | 347 The method considers that an entry is in the file if it finds an entry |
274 that has the same msgid (the test is **case sensitive**). | 348 that has the same msgid (the test is **case sensitive**) and the same |
349 msgctxt (or none for both entries). | |
275 | 350 |
276 Argument: | 351 Argument: |
277 | 352 |
278 ``entry`` | 353 ``entry`` |
279 an instance of :class:`~polib._BaseEntry`. | 354 an instance of :class:`~polib._BaseEntry`. |
280 """ | 355 """ |
281 return self.find(entry.msgid, by='msgid') is not None | 356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \ |
357 is not None | |
282 | 358 |
283 def __eq__(self, other): | 359 def __eq__(self, other): |
284 return unicode(self) == unicode(other) | 360 return str(self) == str(other) |
285 | 361 |
286 def append(self, entry): | 362 def append(self, entry): |
287 """ | 363 """ |
288 Overriden method to check for duplicates entries, if a user tries to | 364 Overridden method to check for duplicates entries, if a user tries to |
289 add an entry that is already in the file, the method will raise a | 365 add an entry that is already in the file, the method will raise a |
290 ``ValueError`` exception. | 366 ``ValueError`` exception. |
291 | 367 |
292 Argument: | 368 Argument: |
293 | 369 |
298 raise ValueError('Entry "%s" already exists' % entry.msgid) | 374 raise ValueError('Entry "%s" already exists' % entry.msgid) |
299 super(_BaseFile, self).append(entry) | 375 super(_BaseFile, self).append(entry) |
300 | 376 |
301 def insert(self, index, entry): | 377 def insert(self, index, entry): |
302 """ | 378 """ |
303 Overriden method to check for duplicates entries, if a user tries to | 379 Overridden method to check for duplicates entries, if a user tries to |
304 add an entry that is already in the file, the method will raise a | 380 add an entry that is already in the file, the method will raise a |
305 ``ValueError`` exception. | 381 ``ValueError`` exception. |
306 | 382 |
307 Arguments: | 383 Arguments: |
308 | 384 |
330 e.msgstr = '\n'.join(strs) + '\n' | 406 e.msgstr = '\n'.join(strs) + '\n' |
331 if self.metadata_is_fuzzy: | 407 if self.metadata_is_fuzzy: |
332 e.flags.append('fuzzy') | 408 e.flags.append('fuzzy') |
333 return e | 409 return e |
334 | 410 |
335 def save(self, fpath=None, repr_method='__str__'): | 411 def save(self, fpath=None, repr_method='__unicode__'): |
336 """ | 412 """ |
337 Saves the po file to ``fpath``. | 413 Saves the po file to ``fpath``. |
338 If it is an existing file and no ``fpath`` is provided, then the | 414 If it is an existing file and no ``fpath`` is provided, then the |
339 existing file is rewritten with the modified data. | 415 existing file is rewritten with the modified data. |
340 | 416 |
352 if fpath is None: | 428 if fpath is None: |
353 fpath = self.fpath | 429 fpath = self.fpath |
354 if repr_method == 'to_binary': | 430 if repr_method == 'to_binary': |
355 fhandle = open(fpath, 'wb') | 431 fhandle = open(fpath, 'wb') |
356 else: | 432 else: |
357 fhandle = codecs.open(fpath, 'w', self.encoding) | 433 fhandle = io.open(fpath, 'w', encoding=self.encoding) |
358 if type(contents) != types.UnicodeType: | 434 if not isinstance(contents, text_type): |
359 contents = contents.decode(self.encoding) | 435 contents = contents.decode(self.encoding) |
360 fhandle.write(contents) | 436 fhandle.write(contents) |
361 fhandle.close() | 437 fhandle.close() |
362 # set the file path if not set | 438 # set the file path if not set |
363 if self.fpath is None and fpath: | 439 if self.fpath is None and fpath: |
379 | 455 |
380 ``include_obsolete_entries`` | 456 ``include_obsolete_entries`` |
381 boolean, whether to also search in entries that are obsolete. | 457 boolean, whether to also search in entries that are obsolete. |
382 | 458 |
383 ``msgctxt`` | 459 ``msgctxt`` |
384 string, allows to specify a specific message context for the | 460 string, allows specifying a specific message context for the |
385 search. | 461 search. |
386 """ | 462 """ |
387 if include_obsolete_entries: | 463 if include_obsolete_entries: |
388 entries = self[:] | 464 entries = self[:] |
389 else: | 465 else: |
390 entries = [e for e in self if not e.obsolete] | 466 entries = [e for e in self if not e.obsolete] |
391 for e in entries: | 467 for e in entries: |
392 if getattr(e, by) == st: | 468 if getattr(e, by) == st: |
393 if msgctxt and e.msgctxt != msgctxt: | 469 if msgctxt is not False and e.msgctxt != msgctxt: |
394 continue | 470 continue |
395 return e | 471 return e |
396 return None | 472 return None |
397 | 473 |
398 def ordered_metadata(self): | 474 def ordered_metadata(self): |
410 'PO-Revision-Date', | 486 'PO-Revision-Date', |
411 'Last-Translator', | 487 'Last-Translator', |
412 'Language-Team', | 488 'Language-Team', |
413 'MIME-Version', | 489 'MIME-Version', |
414 'Content-Type', | 490 'Content-Type', |
415 'Content-Transfer-Encoding' | 491 'Content-Transfer-Encoding', |
492 'Language', | |
493 'Plural-Forms' | |
416 ] | 494 ] |
417 ordered_data = [] | 495 ordered_data = [] |
418 for data in data_order: | 496 for data in data_order: |
419 try: | 497 try: |
420 value = metadata.pop(data) | 498 value = metadata.pop(data) |
421 ordered_data.append((data, value)) | 499 ordered_data.append((data, value)) |
422 except KeyError: | 500 except KeyError: |
423 pass | 501 pass |
424 # the rest of the metadata will be alphabetically ordered since there | 502 # the rest of the metadata will be alphabetically ordered since there |
425 # are no specs for this AFAIK | 503 # are no specs for this AFAIK |
426 keys = metadata.keys() | 504 for data in sorted(metadata.keys()): |
427 keys.sort() | |
428 for data in keys: | |
429 value = metadata[data] | 505 value = metadata[data] |
430 ordered_data.append((data, value)) | 506 ordered_data.append((data, value)) |
431 return ordered_data | 507 return ordered_data |
432 | 508 |
433 def to_binary(self): | 509 def to_binary(self): |
434 """ | 510 """ |
435 Return the binary representation of the file. | 511 Return the binary representation of the file. |
436 """ | 512 """ |
437 offsets = [] | 513 offsets = [] |
438 entries = self.translated_entries() | 514 entries = self.translated_entries() |
515 | |
439 # the keys are sorted in the .mo file | 516 # the keys are sorted in the .mo file |
440 def cmp(_self, other): | 517 def cmp(_self, other): |
441 # msgfmt compares entries with msgctxt if it exists | 518 # msgfmt compares entries with msgctxt if it exists |
442 if _self.msgctxt: | 519 self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid |
443 self_msgid = _self.msgctxt | 520 other_msgid = other.msgctxt and other.msgctxt or other.msgid |
444 else: | |
445 self_msgid = _self.msgid | |
446 | |
447 if other.msgctxt: | |
448 other_msgid = other.msgctxt | |
449 else: | |
450 other_msgid = other.msgid | |
451 if self_msgid > other_msgid: | 521 if self_msgid > other_msgid: |
452 return 1 | 522 return 1 |
453 elif self_msgid < other_msgid: | 523 elif self_msgid < other_msgid: |
454 return -1 | 524 return -1 |
455 else: | 525 else: |
456 return 0 | 526 return 0 |
457 # add metadata entry | 527 # add metadata entry |
458 entries.sort(cmp) | 528 entries.sort(key=lambda o: o.msgctxt or o.msgid) |
459 mentry = self.metadata_as_entry() | 529 mentry = self.metadata_as_entry() |
460 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() | 530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() |
461 entries = [mentry] + entries | 531 entries = [mentry] + entries |
462 entries_len = len(entries) | 532 entries_len = len(entries) |
463 ids, strs = '', '' | 533 ids, strs = b(''), b('') |
464 for e in entries: | 534 for e in entries: |
465 # For each string, we need size and file offset. Each string is | 535 # For each string, we need size and file offset. Each string is |
466 # NUL terminated; the NUL does not count into the size. | 536 # NUL terminated; the NUL does not count into the size. |
467 msgid = '' | 537 msgid = b('') |
468 if e.msgctxt: | 538 if e.msgctxt: |
469 # Contexts are stored by storing the concatenation of the | 539 # Contexts are stored by storing the concatenation of the |
470 # context, a <EOT> byte, and the original string | 540 # context, a <EOT> byte, and the original string |
471 msgid = self._encode(e.msgctxt + '\4') | 541 msgid = self._encode(e.msgctxt + '\4') |
472 if e.msgid_plural: | 542 if e.msgid_plural: |
473 indexes = e.msgstr_plural.keys() | |
474 indexes.sort() | |
475 msgstr = [] | 543 msgstr = [] |
476 for index in indexes: | 544 for index in sorted(e.msgstr_plural.keys()): |
477 msgstr.append(e.msgstr_plural[index]) | 545 msgstr.append(e.msgstr_plural[index]) |
478 msgid += self._encode(e.msgid + '\0' + e.msgid_plural) | 546 msgid += self._encode(e.msgid + '\0' + e.msgid_plural) |
479 msgstr = self._encode('\0'.join(msgstr)) | 547 msgstr = self._encode('\0'.join(msgstr)) |
480 else: | 548 else: |
481 msgid += self._encode(e.msgid) | 549 msgid += self._encode(e.msgid) |
482 msgstr = self._encode(e.msgstr) | 550 msgstr = self._encode(e.msgstr) |
483 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) | 551 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) |
484 ids += msgid + '\0' | 552 ids += msgid + b('\0') |
485 strs += msgstr + '\0' | 553 strs += msgstr + b('\0') |
486 | 554 |
487 # The header is 7 32-bit unsigned integers. | 555 # The header is 7 32-bit unsigned integers. |
488 keystart = 7*4+16*entries_len | 556 keystart = 7 * 4 + 16 * entries_len |
489 # and the values start after the keys | 557 # and the values start after the keys |
490 valuestart = keystart + len(ids) | 558 valuestart = keystart + len(ids) |
491 koffsets = [] | 559 koffsets = [] |
492 voffsets = [] | 560 voffsets = [] |
493 # The string table first has the list of keys, then the list of values. | 561 # The string table first has the list of keys, then the list of values. |
494 # Each entry has first the size of the string, then the file offset. | 562 # Each entry has first the size of the string, then the file offset. |
495 for o1, l1, o2, l2 in offsets: | 563 for o1, l1, o2, l2 in offsets: |
496 koffsets += [l1, o1+keystart] | 564 koffsets += [l1, o1 + keystart] |
497 voffsets += [l2, o2+valuestart] | 565 voffsets += [l2, o2 + valuestart] |
498 offsets = koffsets + voffsets | 566 offsets = koffsets + voffsets |
499 # check endianness for magic number | |
500 if struct.pack('@h', 1) == struct.pack('<h', 1): | |
501 magic_number = MOFile.LITTLE_ENDIAN | |
502 else: | |
503 magic_number = MOFile.BIG_ENDIAN | |
504 | 567 |
505 output = struct.pack( | 568 output = struct.pack( |
506 "Iiiiiii", | 569 "Iiiiiii", |
507 magic_number, # Magic number | 570 # Magic number |
508 0, # Version | 571 MOFile.MAGIC, |
509 entries_len, # # of entries | 572 # Version |
510 7*4, # start of key index | 573 0, |
511 7*4+entries_len*8, # start of value index | 574 # number of entries |
512 0, keystart # size and offset of hash table | 575 entries_len, |
513 # Important: we don't use hash tables | 576 # start of key index |
577 7 * 4, | |
578 # start of value index | |
579 7 * 4 + entries_len * 8, | |
580 # size and offset of hash table, we don't use hash tables | |
581 0, keystart | |
582 | |
514 ) | 583 ) |
515 output += array.array("i", offsets).tostring() | 584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior |
585 output += array.array("i", offsets).tobytes() | |
586 else: | |
587 output += array.array("i", offsets).tostring() | |
516 output += ids | 588 output += ids |
517 output += strs | 589 output += strs |
518 return output | 590 return output |
519 | 591 |
520 def _encode(self, mixed): | 592 def _encode(self, mixed): |
521 """ | 593 """ |
522 Encodes the given ``mixed`` argument with the file encoding if and | 594 Encodes the given ``mixed`` argument with the file encoding if and |
523 only if it's an unicode string and returns the encoded string. | 595 only if it's an unicode string and returns the encoded string. |
524 """ | 596 """ |
525 if type(mixed) == types.UnicodeType: | 597 if isinstance(mixed, text_type): |
526 return mixed.encode(self.encoding) | 598 mixed = mixed.encode(self.encoding) |
527 return mixed | 599 return mixed |
528 | |
529 # }}} | 600 # }}} |
530 # class POFile {{{ | 601 # class POFile {{{ |
602 | |
531 | 603 |
532 class POFile(_BaseFile): | 604 class POFile(_BaseFile): |
533 """ | 605 """ |
534 Po (or Pot) file reader/writer. | 606 Po (or Pot) file reader/writer. |
535 This class inherits the :class:`~polib._BaseFile` class and, by extension, | 607 This class inherits the :class:`~polib._BaseFile` class and, by extension, |
540 """ | 612 """ |
541 Returns the unicode representation of the po file. | 613 Returns the unicode representation of the po file. |
542 """ | 614 """ |
543 ret, headers = '', self.header.split('\n') | 615 ret, headers = '', self.header.split('\n') |
544 for header in headers: | 616 for header in headers: |
545 if header[:1] in [',', ':']: | 617 if not len(header): |
618 ret += "#\n" | |
619 elif header[:1] in [',', ':']: | |
546 ret += '#%s\n' % header | 620 ret += '#%s\n' % header |
547 else: | 621 else: |
548 ret += '# %s\n' % header | 622 ret += '# %s\n' % header |
549 | 623 |
550 if type(ret) != types.UnicodeType: | 624 if not isinstance(ret, text_type): |
551 ret = unicode(ret, self.encoding) | 625 ret = ret.decode(self.encoding) |
552 | 626 |
553 return ret + _BaseFile.__unicode__(self) | 627 return ret + _BaseFile.__unicode__(self) |
554 | 628 |
555 def save_as_mofile(self, fpath): | 629 def save_as_mofile(self, fpath): |
556 """ | 630 """ |
570 """ | 644 """ |
571 total = len([e for e in self if not e.obsolete]) | 645 total = len([e for e in self if not e.obsolete]) |
572 if total == 0: | 646 if total == 0: |
573 return 100 | 647 return 100 |
574 translated = len(self.translated_entries()) | 648 translated = len(self.translated_entries()) |
575 return int((100.00 / float(total)) * translated) | 649 return int(translated * 100 / float(total)) |
576 | 650 |
577 def translated_entries(self): | 651 def translated_entries(self): |
578 """ | 652 """ |
579 Convenience method that returns the list of translated entries. | 653 Convenience method that returns the list of translated entries. |
580 """ | 654 """ |
582 | 656 |
583 def untranslated_entries(self): | 657 def untranslated_entries(self): |
584 """ | 658 """ |
585 Convenience method that returns the list of untranslated entries. | 659 Convenience method that returns the list of untranslated entries. |
586 """ | 660 """ |
587 return [e for e in self if not e.translated() and not e.obsolete \ | 661 return [e for e in self if not e.translated() and not e.obsolete |
588 and not 'fuzzy' in e.flags] | 662 and not 'fuzzy' in e.flags] |
589 | 663 |
590 def fuzzy_entries(self): | 664 def fuzzy_entries(self): |
591 """ | 665 """ |
592 Convenience method that returns the list of fuzzy entries. | 666 Convenience method that returns the list of fuzzy entries. |
613 Keyword argument: | 687 Keyword argument: |
614 | 688 |
615 ``refpot`` | 689 ``refpot`` |
616 object POFile, the reference catalog. | 690 object POFile, the reference catalog. |
617 """ | 691 """ |
692 # Store entries in dict/set for faster access | |
693 self_entries = dict((entry.msgid, entry) for entry in self) | |
694 refpot_msgids = set(entry.msgid for entry in refpot) | |
695 # Merge entries that are in the refpot | |
618 for entry in refpot: | 696 for entry in refpot: |
619 e = self.find(entry.msgid, include_obsolete_entries=True) | 697 e = self_entries.get(entry.msgid) |
620 if e is None: | 698 if e is None: |
621 e = POEntry() | 699 e = POEntry() |
622 self.append(e) | 700 self.append(e) |
623 e.merge(entry) | 701 e.merge(entry) |
624 # ok, now we must "obsolete" entries that are not in the refpot anymore | 702 # ok, now we must "obsolete" entries that are not in the refpot anymore |
625 for entry in self: | 703 for entry in self: |
626 if refpot.find(entry.msgid) is None: | 704 if entry.msgid not in refpot_msgids: |
627 entry.obsolete = True | 705 entry.obsolete = True |
628 | |
629 # }}} | 706 # }}} |
630 # class MOFile {{{ | 707 # class MOFile {{{ |
708 | |
631 | 709 |
632 class MOFile(_BaseFile): | 710 class MOFile(_BaseFile): |
633 """ | 711 """ |
634 Mo file reader/writer. | 712 Mo file reader/writer. |
635 This class inherits the :class:`~polib._BaseFile` class and, by | 713 This class inherits the :class:`~polib._BaseFile` class and, by |
636 extension, the python ``list`` type. | 714 extension, the python ``list`` type. |
637 """ | 715 """ |
638 BIG_ENDIAN = 0xde120495 | 716 MAGIC = 0x950412de |
639 LITTLE_ENDIAN = 0x950412de | 717 MAGIC_SWAPPED = 0xde120495 |
640 | 718 |
641 def __init__(self, *args, **kwargs): | 719 def __init__(self, *args, **kwargs): |
642 """ | 720 """ |
643 Constructor, accepts all keywords arguments accepted by | 721 Constructor, accepts all keywords arguments accepted by |
644 :class:`~polib._BaseFile` class. | 722 :class:`~polib._BaseFile` class. |
696 def obsolete_entries(self): | 774 def obsolete_entries(self): |
697 """ | 775 """ |
698 Convenience method to keep the same interface with POFile instances. | 776 Convenience method to keep the same interface with POFile instances. |
699 """ | 777 """ |
700 return [] | 778 return [] |
701 | |
702 # }}} | 779 # }}} |
703 # class _BaseEntry {{{ | 780 # class _BaseEntry {{{ |
781 | |
704 | 782 |
705 class _BaseEntry(object): | 783 class _BaseEntry(object): |
706 """ | 784 """ |
707 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes. | 785 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes. |
708 This class should **not** be instanciated directly. | 786 This class should **not** be instanciated directly. |
751 else: | 829 else: |
752 delflag = '' | 830 delflag = '' |
753 ret = [] | 831 ret = [] |
754 # write the msgctxt if any | 832 # write the msgctxt if any |
755 if self.msgctxt is not None: | 833 if self.msgctxt is not None: |
756 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth) | 834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, |
835 wrapwidth) | |
757 # write the msgid | 836 # write the msgid |
758 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) | 837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) |
759 # write the msgid_plural if any | 838 # write the msgid_plural if any |
760 if self.msgid_plural: | 839 if self.msgid_plural: |
761 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural, wrapwidth) | 840 ret += self._str_field("msgid_plural", delflag, "", |
841 self.msgid_plural, wrapwidth) | |
762 if self.msgstr_plural: | 842 if self.msgstr_plural: |
763 # write the msgstr_plural if any | 843 # write the msgstr_plural if any |
764 msgstrs = self.msgstr_plural | 844 msgstrs = self.msgstr_plural |
765 keys = list(msgstrs) | 845 keys = list(msgstrs) |
766 keys.sort() | 846 keys.sort() |
767 for index in keys: | 847 for index in keys: |
768 msgstr = msgstrs[index] | 848 msgstr = msgstrs[index] |
769 plural_index = '[%s]' % index | 849 plural_index = '[%s]' % index |
770 ret += self._str_field("msgstr", delflag, plural_index, msgstr, wrapwidth) | 850 ret += self._str_field("msgstr", delflag, plural_index, msgstr, |
851 wrapwidth) | |
771 else: | 852 else: |
772 # otherwise write the msgstr | 853 # otherwise write the msgstr |
773 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth) | 854 ret += self._str_field("msgstr", delflag, "", self.msgstr, |
855 wrapwidth) | |
774 ret.append('') | 856 ret.append('') |
775 ret = '\n'.join(ret) | 857 ret = u('\n').join(ret) |
776 | |
777 if type(ret) != types.UnicodeType: | |
778 return unicode(ret, self.encoding) | |
779 return ret | 858 return ret |
780 | 859 |
781 def __str__(self): | 860 if PY3: |
782 """ | 861 def __str__(self): |
783 Returns the string representation of the entry. | 862 return self.__unicode__() |
784 """ | 863 else: |
785 return unicode(self).encode(self.encoding) | 864 def __str__(self): |
865 """ | |
866 Returns the string representation of the entry. | |
867 """ | |
868 return unicode(self).encode(self.encoding) | |
786 | 869 |
787 def __eq__(self, other): | 870 def __eq__(self, other): |
788 return unicode(self) == unicode(other) | 871 return str(self) == str(other) |
789 | 872 |
790 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78): | 873 def _str_field(self, fieldname, delflag, plural_index, field, |
874 wrapwidth=78): | |
791 lines = field.splitlines(True) | 875 lines = field.splitlines(True) |
792 if len(lines) > 1: | 876 if len(lines) > 1: |
793 lines = [''] + lines # start with initial empty line | 877 lines = [''] + lines # start with initial empty line |
794 else: | 878 else: |
795 escaped_field = escape(field) | 879 escaped_field = escape(field) |
796 specialchars_count = 0 | 880 specialchars_count = 0 |
797 for c in ['\\', '\n', '\r', '\t', '"']: | 881 for c in ['\\', '\n', '\r', '\t', '"']: |
798 specialchars_count += field.count(c) | 882 specialchars_count += field.count(c) |
802 if plural_index: | 886 if plural_index: |
803 flength += len(plural_index) | 887 flength += len(plural_index) |
804 real_wrapwidth = wrapwidth - flength + specialchars_count | 888 real_wrapwidth = wrapwidth - flength + specialchars_count |
805 if wrapwidth > 0 and len(field) > real_wrapwidth: | 889 if wrapwidth > 0 and len(field) > real_wrapwidth: |
806 # Wrap the line but take field name into account | 890 # Wrap the line but take field name into account |
807 lines = [''] + [unescape(item) for item in textwrap.wrap( | 891 lines = [''] + [unescape(item) for item in wrap( |
808 escaped_field, | 892 escaped_field, |
809 wrapwidth - 2, # 2 for quotes "" | 893 wrapwidth - 2, # 2 for quotes "" |
810 drop_whitespace=False, | 894 drop_whitespace=False, |
811 break_long_words=False | 895 break_long_words=False |
812 )] | 896 )] |
813 else: | 897 else: |
814 lines = [field] | 898 lines = [field] |
816 # quick and dirty trick to get the real field name | 900 # quick and dirty trick to get the real field name |
817 fieldname = fieldname[9:] | 901 fieldname = fieldname[9:] |
818 | 902 |
819 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, | 903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, |
820 escape(lines.pop(0)))] | 904 escape(lines.pop(0)))] |
821 for mstr in lines: | 905 for line in lines: |
822 ret.append('%s"%s"' % (delflag, escape(mstr))) | 906 ret.append('%s"%s"' % (delflag, escape(line))) |
823 return ret | 907 return ret |
824 | |
825 # }}} | 908 # }}} |
826 # class POEntry {{{ | 909 # class POEntry {{{ |
827 | 910 |
911 | |
828 class POEntry(_BaseEntry): | 912 class POEntry(_BaseEntry): |
829 """ | 913 """ |
830 Represents a po file entry. | 914 Represents a po file entry. |
831 """ | 915 """ |
832 | 916 |
852 ``previous_msgid`` | 936 ``previous_msgid`` |
853 string, the entry previous msgid. | 937 string, the entry previous msgid. |
854 | 938 |
855 ``previous_msgid_plural`` | 939 ``previous_msgid_plural`` |
856 string, the entry previous msgid_plural. | 940 string, the entry previous msgid_plural. |
941 | |
942 ``linenum`` | |
943 integer, the line number of the entry | |
857 """ | 944 """ |
858 _BaseEntry.__init__(self, *args, **kwargs) | 945 _BaseEntry.__init__(self, *args, **kwargs) |
859 self.comment = kwargs.get('comment', '') | 946 self.comment = kwargs.get('comment', '') |
860 self.tcomment = kwargs.get('tcomment', '') | 947 self.tcomment = kwargs.get('tcomment', '') |
861 self.occurrences = kwargs.get('occurrences', []) | 948 self.occurrences = kwargs.get('occurrences', []) |
862 self.flags = kwargs.get('flags', []) | 949 self.flags = kwargs.get('flags', []) |
863 self.previous_msgctxt = kwargs.get('previous_msgctxt', None) | 950 self.previous_msgctxt = kwargs.get('previous_msgctxt', None) |
864 self.previous_msgid = kwargs.get('previous_msgid', None) | 951 self.previous_msgid = kwargs.get('previous_msgid', None) |
865 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) | 952 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) |
953 self.linenum = kwargs.get('linenum', None) | |
866 | 954 |
867 def __unicode__(self, wrapwidth=78): | 955 def __unicode__(self, wrapwidth=78): |
868 """ | 956 """ |
869 Returns the unicode representation of the entry. | 957 Returns the unicode representation of the entry. |
870 """ | 958 """ |
877 for c in comments: | 965 for c in comments: |
878 val = getattr(self, c[0]) | 966 val = getattr(self, c[0]) |
879 if val: | 967 if val: |
880 for comment in val.split('\n'): | 968 for comment in val.split('\n'): |
881 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth: | 969 if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth: |
882 ret += textwrap.wrap( | 970 ret += wrap( |
883 comment, | 971 comment, |
884 wrapwidth, | 972 wrapwidth, |
885 initial_indent=c[1], | 973 initial_indent=c[1], |
886 subsequent_indent=c[1], | 974 subsequent_indent=c[1], |
887 break_long_words=False | 975 break_long_words=False |
901 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: | 989 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: |
902 # textwrap split words that contain hyphen, this is not | 990 # textwrap split words that contain hyphen, this is not |
903 # what we want for filenames, so the dirty hack is to | 991 # what we want for filenames, so the dirty hack is to |
904 # temporally replace hyphens with a char that a file cannot | 992 # temporally replace hyphens with a char that a file cannot |
905 # contain, like "*" | 993 # contain, like "*" |
906 ret += [l.replace('*', '-') for l in textwrap.wrap( | 994 ret += [l.replace('*', '-') for l in wrap( |
907 filestr.replace('-', '*'), | 995 filestr.replace('-', '*'), |
908 wrapwidth, | 996 wrapwidth, |
909 initial_indent='#: ', | 997 initial_indent='#: ', |
910 subsequent_indent='#: ', | 998 subsequent_indent='#: ', |
911 break_long_words=False | 999 break_long_words=False |
916 # flags (TODO: wrapping ?) | 1004 # flags (TODO: wrapping ?) |
917 if self.flags: | 1005 if self.flags: |
918 ret.append('#, %s' % ', '.join(self.flags)) | 1006 ret.append('#, %s' % ', '.join(self.flags)) |
919 | 1007 |
920 # previous context and previous msgid/msgid_plural | 1008 # previous context and previous msgid/msgid_plural |
921 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural'] | 1009 fields = ['previous_msgctxt', 'previous_msgid', |
1010 'previous_msgid_plural'] | |
922 for f in fields: | 1011 for f in fields: |
923 val = getattr(self, f) | 1012 val = getattr(self, f) |
924 if val: | 1013 if val: |
925 ret += self._str_field(f, "#| ", "", val, wrapwidth) | 1014 ret += self._str_field(f, "#| ", "", val, wrapwidth) |
926 | 1015 |
927 ret.append(_BaseEntry.__unicode__(self, wrapwidth)) | 1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth)) |
928 ret = '\n'.join(ret) | 1017 ret = u('\n').join(ret) |
929 | 1018 |
930 if type(ret) != types.UnicodeType: | 1019 assert isinstance(ret, text_type) |
931 return unicode(ret, self.encoding) | 1020 #if type(ret) != types.UnicodeType: |
1021 # return unicode(ret, self.encoding) | |
932 return ret | 1022 return ret |
933 | 1023 |
934 def __cmp__(self, other): | 1024 def __cmp__(self, other): |
935 """ | 1025 """ |
936 Called by comparison operations if rich comparison is not defined. | 1026 Called by comparison operations if rich comparison is not defined. |
937 """ | 1027 """ |
938 def compare_occurrences(a, b): | |
939 """ | |
940 Compare an entry occurrence with another one. | |
941 """ | |
942 if a[0] != b[0]: | |
943 return a[0] < b[0] | |
944 if a[1] != b[1]: | |
945 return a[1] < b[1] | |
946 return 0 | |
947 | 1028 |
948 # First: Obsolete test | 1029 # First: Obsolete test |
949 if self.obsolete != other.obsolete: | 1030 if self.obsolete != other.obsolete: |
950 if self.obsolete: | 1031 if self.obsolete: |
951 return -1 | 1032 return -1 |
952 else: | 1033 else: |
953 return 1 | 1034 return 1 |
954 # Work on a copy to protect original | 1035 # Work on a copy to protect original |
955 occ1 = self.occurrences[:] | 1036 occ1 = sorted(self.occurrences[:]) |
956 occ2 = other.occurrences[:] | 1037 occ2 = sorted(other.occurrences[:]) |
957 # Sorting using compare method | |
958 occ1.sort(compare_occurrences) | |
959 occ2.sort(compare_occurrences) | |
960 # Comparing sorted occurrences | |
961 pos = 0 | 1038 pos = 0 |
962 for entry1 in occ1: | 1039 for entry1 in occ1: |
963 try: | 1040 try: |
964 entry2 = occ2[pos] | 1041 entry2 = occ2[pos] |
965 except IndexError: | 1042 except IndexError: |
973 if entry1[1] != entry2[1]: | 1050 if entry1[1] != entry2[1]: |
974 if entry1[1] > entry2[1]: | 1051 if entry1[1] > entry2[1]: |
975 return 1 | 1052 return 1 |
976 else: | 1053 else: |
977 return -1 | 1054 return -1 |
1055 # Compare msgid_plural if set | |
1056 if self.msgid_plural: | |
1057 if not other.msgid_plural: | |
1058 return 1 | |
1059 for pos in self.msgid_plural: | |
1060 if pos not in other.msgid_plural: | |
1061 return 1 | |
1062 if self.msgid_plural[pos] > other.msgid_plural[pos]: | |
1063 return 1 | |
1064 if self.msgid_plural[pos] < other.msgid_plural[pos]: | |
1065 return -1 | |
978 # Finally: Compare message ID | 1066 # Finally: Compare message ID |
979 if self.msgid > other.msgid: return 1 | 1067 if self.msgid > other.msgid: |
980 else: return -1 | 1068 return 1 |
1069 elif self.msgid < other.msgid: | |
1070 return -1 | |
1071 return 0 | |
1072 | |
1073 def __gt__(self, other): | |
1074 return self.__cmp__(other) > 0 | |
1075 | |
1076 def __lt__(self, other): | |
1077 return self.__cmp__(other) < 0 | |
1078 | |
1079 def __ge__(self, other): | |
1080 return self.__cmp__(other) >= 0 | |
1081 | |
1082 def __le__(self, other): | |
1083 return self.__cmp__(other) <= 0 | |
1084 | |
1085 def __eq__(self, other): | |
1086 return self.__cmp__(other) == 0 | |
1087 | |
1088 def __ne__(self, other): | |
1089 return self.__cmp__(other) != 0 | |
981 | 1090 |
982 def translated(self): | 1091 def translated(self): |
983 """ | 1092 """ |
984 Returns ``True`` if the entry has been translated or ``False`` | 1093 Returns ``True`` if the entry has been translated or ``False`` |
985 otherwise. | 1094 otherwise. |
1018 # keep existing translation at pos if any | 1127 # keep existing translation at pos if any |
1019 self.msgstr_plural[pos] | 1128 self.msgstr_plural[pos] |
1020 except KeyError: | 1129 except KeyError: |
1021 self.msgstr_plural[pos] = '' | 1130 self.msgstr_plural[pos] = '' |
1022 | 1131 |
1132 def __hash__(self): | |
1133 return hash((self.msgid, self.msgstr)) | |
1023 # }}} | 1134 # }}} |
1024 # class MOEntry {{{ | 1135 # class MOEntry {{{ |
1025 | 1136 |
1137 | |
1026 class MOEntry(_BaseEntry): | 1138 class MOEntry(_BaseEntry): |
1027 """ | 1139 """ |
1028 Represents a mo file entry. | 1140 Represents a mo file entry. |
1029 """ | 1141 """ |
1030 pass | 1142 def __init__(self, *args, **kwargs): |
1143 """ | |
1144 Constructor, accepts the following keyword arguments, | |
1145 for consistency with :class:`~polib.POEntry`: | |
1146 | |
1147 ``comment`` | |
1148 ``tcomment`` | |
1149 ``occurrences`` | |
1150 ``flags`` | |
1151 ``previous_msgctxt`` | |
1152 ``previous_msgid`` | |
1153 ``previous_msgid_plural`` | |
1154 | |
1155 Note: even though these keyword arguments are accepted, | |
1156 they hold no real meaning in the context of MO files | |
1157 and are simply ignored. | |
1158 """ | |
1159 _BaseEntry.__init__(self, *args, **kwargs) | |
1160 self.comment = '' | |
1161 self.tcomment = '' | |
1162 self.occurrences = [] | |
1163 self.flags = [] | |
1164 self.previous_msgctxt = None | |
1165 self.previous_msgid = None | |
1166 self.previous_msgid_plural = None | |
1167 | |
1168 def __hash__(self): | |
1169 return hash((self.msgid, self.msgstr)) | |
1031 | 1170 |
1032 # }}} | 1171 # }}} |
1033 # class _POFileParser {{{ | 1172 # class _POFileParser {{{ |
1173 | |
1034 | 1174 |
1035 class _POFileParser(object): | 1175 class _POFileParser(object): |
1036 """ | 1176 """ |
1037 A finite state machine to parse efficiently and correctly po | 1177 A finite state machine to parse efficiently and correctly po |
1038 file format. | 1178 file format. |
1054 ``check_for_duplicates`` | 1194 ``check_for_duplicates`` |
1055 whether to check for duplicate entries when adding entries to the | 1195 whether to check for duplicate entries when adding entries to the |
1056 file (optional, default: ``False``). | 1196 file (optional, default: ``False``). |
1057 """ | 1197 """ |
1058 enc = kwargs.get('encoding', default_encoding) | 1198 enc = kwargs.get('encoding', default_encoding) |
1059 if os.path.exists(pofile): | 1199 if _is_file(pofile): |
1060 try: | 1200 try: |
1061 self.fhandle = codecs.open(pofile, 'rU', enc) | 1201 self.fhandle = io.open(pofile, 'rt', encoding=enc) |
1062 except LookupError: | 1202 except LookupError: |
1063 enc = default_encoding | 1203 enc = default_encoding |
1064 self.fhandle = codecs.open(pofile, 'rU', enc) | 1204 self.fhandle = io.open(pofile, 'rt', encoding=enc) |
1065 else: | 1205 else: |
1066 self.fhandle = pofile.splitlines() | 1206 self.fhandle = pofile.splitlines() |
1067 | 1207 |
1068 self.instance = POFile( | 1208 klass = kwargs.get('klass') |
1209 if klass is None: | |
1210 klass = POFile | |
1211 self.instance = klass( | |
1069 pofile=pofile, | 1212 pofile=pofile, |
1070 encoding=enc, | 1213 encoding=enc, |
1071 check_for_duplicates=kwargs.get('check_for_duplicates', False) | 1214 check_for_duplicates=kwargs.get('check_for_duplicates', False) |
1072 ) | 1215 ) |
1073 self.transitions = {} | 1216 self.transitions = {} |
1074 self.current_entry = POEntry() | 1217 self.current_line = 0 |
1075 self.current_state = 'ST' | 1218 self.current_entry = POEntry(linenum=self.current_line) |
1219 self.current_state = 'st' | |
1076 self.current_token = None | 1220 self.current_token = None |
1077 # two memo flags used in handlers | 1221 # two memo flags used in handlers |
1078 self.msgstr_index = 0 | 1222 self.msgstr_index = 0 |
1079 self.entry_obsolete = 0 | 1223 self.entry_obsolete = 0 |
1080 # Configure the state machine, by adding transitions. | 1224 # Configure the state machine, by adding transitions. |
1081 # Signification of symbols: | 1225 # Signification of symbols: |
1082 # * ST: Beginning of the file (start) | 1226 # * ST: Beginning of the file (start) |
1083 # * HE: Header | 1227 # * HE: Header |
1084 # * TC: a translation comment | 1228 # * TC: a translation comment |
1085 # * GC: a generated comment | 1229 # * GC: a generated comment |
1086 # * OC: a file/line occurence | 1230 # * OC: a file/line occurrence |
1087 # * FL: a flags line | 1231 # * FL: a flags line |
1088 # * CT: a message context | 1232 # * CT: a message context |
1089 # * PC: a previous msgctxt | 1233 # * PC: a previous msgctxt |
1090 # * PM: a previous msgid | 1234 # * PM: a previous msgid |
1091 # * PP: a previous msgid_plural | 1235 # * PP: a previous msgid_plural |
1092 # * MI: a msgid | 1236 # * MI: a msgid |
1093 # * MP: a msgid plural | 1237 # * MP: a msgid plural |
1094 # * MS: a msgstr | 1238 # * MS: a msgstr |
1095 # * MX: a msgstr plural | 1239 # * MX: a msgstr plural |
1096 # * MC: a msgid or msgstr continuation line | 1240 # * MC: a msgid or msgstr continuation line |
1097 all = ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'PC', 'PM', 'PP', 'TC', | 1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc', |
1098 'MS', 'MP', 'MX', 'MI'] | 1242 'ms', 'mp', 'mx', 'mi'] |
1099 | 1243 |
1100 self.add('TC', ['ST', 'HE'], 'HE') | 1244 self.add('tc', ['st', 'he'], 'he') |
1101 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'PC', 'PM', 'PP', 'MS', | 1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', |
1102 'MP', 'MX', 'MI'], 'TC') | 1246 'mp', 'mx', 'mi'], 'tc') |
1103 self.add('GC', all, 'GC') | 1247 self.add('gc', all, 'gc') |
1104 self.add('OC', all, 'OC') | 1248 self.add('oc', all, 'oc') |
1105 self.add('FL', all, 'FL') | 1249 self.add('fl', all, 'fl') |
1106 self.add('PC', all, 'PC') | 1250 self.add('pc', all, 'pc') |
1107 self.add('PM', all, 'PM') | 1251 self.add('pm', all, 'pm') |
1108 self.add('PP', all, 'PP') | 1252 self.add('pp', all, 'pp') |
1109 self.add('CT', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'PC', 'PM', | 1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', |
1110 'PP', 'MS', 'MX'], 'CT') | 1254 'pp', 'ms', 'mx'], 'ct') |
1111 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'CT', 'TC', 'PC', | 1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', |
1112 'PM', 'PP', 'MS', 'MX'], 'MI') | 1256 'pm', 'pp', 'ms', 'mx'], 'mi') |
1113 self.add('MP', ['TC', 'GC', 'PC', 'PM', 'PP', 'MI'], 'MP') | 1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp') |
1114 self.add('MS', ['MI', 'MP', 'TC'], 'MS') | 1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms') |
1115 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') | 1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx') |
1116 self.add('MC', ['CT', 'MI', 'MP', 'MS', 'MX', 'PM', 'PP', 'PC'], 'MC') | 1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc') |
1117 | 1261 |
1118 def parse(self): | 1262 def parse(self): |
1119 """ | 1263 """ |
1120 Run the state machine, parse the file line by line and call process() | 1264 Run the state machine, parse the file line by line and call process() |
1121 with the current matched symbol. | 1265 with the current matched symbol. |
1122 """ | 1266 """ |
1123 i = 0 | |
1124 | 1267 |
1125 keywords = { | 1268 keywords = { |
1126 'msgctxt': 'CT', | 1269 'msgctxt': 'ct', |
1127 'msgid': 'MI', | 1270 'msgid': 'mi', |
1128 'msgstr': 'MS', | 1271 'msgstr': 'ms', |
1129 'msgid_plural': 'MP', | 1272 'msgid_plural': 'mp', |
1130 } | 1273 } |
1131 prev_keywords = { | 1274 prev_keywords = { |
1132 'msgid_plural': 'PP', | 1275 'msgid_plural': 'pp', |
1133 'msgid': 'PM', | 1276 'msgid': 'pm', |
1134 'msgctxt': 'PC', | 1277 'msgctxt': 'pc', |
1135 } | 1278 } |
1136 | 1279 tokens = [] |
1137 for line in self.fhandle: | 1280 for line in self.fhandle: |
1138 i += 1 | 1281 self.current_line += 1 |
1139 line = line.strip() | 1282 line = line.strip() |
1140 if line == '': | 1283 if line == '': |
1141 continue | 1284 continue |
1142 | 1285 |
1143 tokens = line.split(None, 2) | 1286 tokens = line.split(None, 2) |
1144 nb_tokens = len(tokens) | 1287 nb_tokens = len(tokens) |
1288 | |
1289 if tokens[0] == '#~|': | |
1290 continue | |
1145 | 1291 |
1146 if tokens[0] == '#~' and nb_tokens > 1: | 1292 if tokens[0] == '#~' and nb_tokens > 1: |
1147 line = line[3:].strip() | 1293 line = line[3:].strip() |
1148 tokens = tokens[1:] | 1294 tokens = tokens[1:] |
1149 nb_tokens -= 1 | 1295 nb_tokens -= 1 |
1153 | 1299 |
1154 # Take care of keywords like | 1300 # Take care of keywords like |
1155 # msgid, msgid_plural, msgctxt & msgstr. | 1301 # msgid, msgid_plural, msgctxt & msgstr. |
1156 if tokens[0] in keywords and nb_tokens > 1: | 1302 if tokens[0] in keywords and nb_tokens > 1: |
1157 line = line[len(tokens[0]):].lstrip() | 1303 line = line[len(tokens[0]):].lstrip() |
1304 if re.search(r'([^\\]|^)"', line[1:-1]): | |
1305 raise IOError('Syntax error in po file %s (line %s): ' | |
1306 'unescaped double quote found' % | |
1307 (self.instance.fpath, self.current_line)) | |
1158 self.current_token = line | 1308 self.current_token = line |
1159 self.process(keywords[tokens[0]], i) | 1309 self.process(keywords[tokens[0]]) |
1160 continue | 1310 continue |
1161 | 1311 |
1162 self.current_token = line | 1312 self.current_token = line |
1163 | 1313 |
1164 if tokens[0] == '#:' and nb_tokens > 1: | 1314 if tokens[0] == '#:': |
1315 if nb_tokens <= 1: | |
1316 continue | |
1165 # we are on a occurrences line | 1317 # we are on a occurrences line |
1166 self.process('OC', i) | 1318 self.process('oc') |
1167 | 1319 |
1168 elif line[:1] == '"': | 1320 elif line[:1] == '"': |
1169 # we are on a continuation line | 1321 # we are on a continuation line |
1170 self.process('MC', i) | 1322 if re.search(r'([^\\]|^)"', line[1:-1]): |
1323 raise IOError('Syntax error in po file %s (line %s): ' | |
1324 'unescaped double quote found' % | |
1325 (self.instance.fpath, self.current_line)) | |
1326 self.process('mc') | |
1171 | 1327 |
1172 elif line[:7] == 'msgstr[': | 1328 elif line[:7] == 'msgstr[': |
1173 # we are on a msgstr plural | 1329 # we are on a msgstr plural |
1174 self.process('MX', i) | 1330 self.process('mx') |
1175 | 1331 |
1176 elif tokens[0] == '#,' and nb_tokens > 1: | 1332 elif tokens[0] == '#,': |
1333 if nb_tokens <= 1: | |
1334 continue | |
1177 # we are on a flags line | 1335 # we are on a flags line |
1178 self.process('FL', i) | 1336 self.process('fl') |
1179 | 1337 |
1180 elif tokens[0] == '#': | 1338 elif tokens[0] == '#' or tokens[0].startswith('##'): |
1181 if line == '#': line += ' ' | 1339 if line == '#': |
1340 line += ' ' | |
1182 # we are on a translator comment line | 1341 # we are on a translator comment line |
1183 self.process('TC', i) | 1342 self.process('tc') |
1184 | 1343 |
1185 elif tokens[0] == '#.' and nb_tokens > 1: | 1344 elif tokens[0] == '#.': |
1345 if nb_tokens <= 1: | |
1346 continue | |
1186 # we are on a generated comment line | 1347 # we are on a generated comment line |
1187 self.process('GC', i) | 1348 self.process('gc') |
1188 | 1349 |
1189 elif tokens[0] == '#|': | 1350 elif tokens[0] == '#|': |
1190 if nb_tokens < 2: | 1351 if nb_tokens <= 1: |
1191 self.process('??', i) | 1352 raise IOError('Syntax error in po file %s (line %s)' % |
1192 continue | 1353 (self.instance.fpath, self.current_line)) |
1193 | 1354 |
1194 # Remove the marker and any whitespace right after that. | 1355 # Remove the marker and any whitespace right after that. |
1195 line = line[2:].lstrip() | 1356 line = line[2:].lstrip() |
1196 self.current_token = line | 1357 self.current_token = line |
1197 | 1358 |
1198 if tokens[1].startswith('"'): | 1359 if tokens[1].startswith('"'): |
1199 # Continuation of previous metadata. | 1360 # Continuation of previous metadata. |
1200 self.process('MC', i) | 1361 self.process('mc') |
1201 continue | 1362 continue |
1202 | 1363 |
1203 if nb_tokens == 2: | 1364 if nb_tokens == 2: |
1204 # Invalid continuation line. | 1365 # Invalid continuation line. |
1205 self.process('??', i) | 1366 raise IOError('Syntax error in po file %s (line %s): ' |
1367 'invalid continuation line' % | |
1368 (self.instance.fpath, self.current_line)) | |
1206 | 1369 |
1207 # we are on a "previous translation" comment line, | 1370 # we are on a "previous translation" comment line, |
1208 if tokens[1] not in prev_keywords: | 1371 if tokens[1] not in prev_keywords: |
1209 # Unknown keyword in previous translation comment. | 1372 # Unknown keyword in previous translation comment. |
1210 self.process('??', i) | 1373 raise IOError('Syntax error in po file %s (line %s): ' |
1374 'unknown keyword %s' % | |
1375 (self.instance.fpath, self.current_line, | |
1376 tokens[1])) | |
1211 | 1377 |
1212 # Remove the keyword and any whitespace | 1378 # Remove the keyword and any whitespace |
1213 # between it and the starting quote. | 1379 # between it and the starting quote. |
1214 line = line[len(tokens[1]):].lstrip() | 1380 line = line[len(tokens[1]):].lstrip() |
1215 self.current_token = line | 1381 self.current_token = line |
1216 self.process(prev_keywords[tokens[1]], i) | 1382 self.process(prev_keywords[tokens[1]]) |
1217 | 1383 |
1218 else: | 1384 else: |
1219 self.process('??', i) | 1385 raise IOError('Syntax error in po file %s (line %s)' % |
1220 | 1386 (self.instance.fpath, self.current_line)) |
1221 if self.current_entry: | 1387 |
1388 if self.current_entry and len(tokens) > 0 and \ | |
1389 not tokens[0].startswith('#'): | |
1222 # since entries are added when another entry is found, we must add | 1390 # since entries are added when another entry is found, we must add |
1223 # the last entry here (only if there are lines) | 1391 # the last entry here (only if there are lines). Trailing comments |
1392 # are ignored | |
1224 self.instance.append(self.current_entry) | 1393 self.instance.append(self.current_entry) |
1394 | |
1225 # before returning the instance, check if there's metadata and if | 1395 # before returning the instance, check if there's metadata and if |
1226 # so extract it in a dict | 1396 # so extract it in a dict |
1227 firstentry = self.instance[0] | 1397 metadataentry = self.instance.find('') |
1228 if firstentry.msgid == '': # metadata found | 1398 if metadataentry: # metadata found |
1229 # remove the entry | 1399 # remove the entry |
1230 firstentry = self.instance.pop(0) | 1400 self.instance.remove(metadataentry) |
1231 self.instance.metadata_is_fuzzy = firstentry.flags | 1401 self.instance.metadata_is_fuzzy = metadataentry.flags |
1232 key = None | 1402 key = None |
1233 for msg in firstentry.msgstr.splitlines(): | 1403 for msg in metadataentry.msgstr.splitlines(): |
1234 try: | 1404 try: |
1235 key, val = msg.split(':', 1) | 1405 key, val = msg.split(':', 1) |
1236 self.instance.metadata[key] = val.strip() | 1406 self.instance.metadata[key] = val.strip() |
1237 except: | 1407 except (ValueError, KeyError): |
1238 if key is not None: | 1408 if key is not None: |
1239 self.instance.metadata[key] += '\n'+ msg.strip() | 1409 self.instance.metadata[key] += '\n' + msg.strip() |
1240 # close opened file | 1410 # close opened file |
1241 if isinstance(self.fhandle, file): | 1411 if not isinstance(self.fhandle, list): # must be file |
1242 self.fhandle.close() | 1412 self.fhandle.close() |
1243 return self.instance | 1413 return self.instance |
1244 | 1414 |
1245 def add(self, symbol, states, next_state): | 1415 def add(self, symbol, states, next_state): |
1246 """ | 1416 """ |
1256 | 1426 |
1257 ``next_state`` | 1427 ``next_state`` |
1258 the next state the fsm will have after the action. | 1428 the next state the fsm will have after the action. |
1259 """ | 1429 """ |
1260 for state in states: | 1430 for state in states: |
1261 action = getattr(self, 'handle_%s' % next_state.lower()) | 1431 action = getattr(self, 'handle_%s' % next_state) |
1262 self.transitions[(symbol, state)] = (action, next_state) | 1432 self.transitions[(symbol, state)] = (action, next_state) |
1263 | 1433 |
1264 def process(self, symbol, linenum): | 1434 def process(self, symbol): |
1265 """ | 1435 """ |
1266 Process the transition corresponding to the current state and the | 1436 Process the transition corresponding to the current state and the |
1267 symbol provided. | 1437 symbol provided. |
1268 | 1438 |
1269 Keywords arguments: | 1439 Keywords arguments: |
1276 """ | 1446 """ |
1277 try: | 1447 try: |
1278 (action, state) = self.transitions[(symbol, self.current_state)] | 1448 (action, state) = self.transitions[(symbol, self.current_state)] |
1279 if action(): | 1449 if action(): |
1280 self.current_state = state | 1450 self.current_state = state |
1281 except Exception as exc: | 1451 except Exception: |
1282 raise IOError('Syntax error in po file (line %s)' % linenum) | 1452 raise IOError('Syntax error in po file (line %s)' % |
1453 self.current_line) | |
1283 | 1454 |
1284 # state handlers | 1455 # state handlers |
1285 | 1456 |
1286 def handle_he(self): | 1457 def handle_he(self): |
1287 """Handle a header comment.""" | 1458 """Handle a header comment.""" |
1290 self.instance.header += self.current_token[2:] | 1461 self.instance.header += self.current_token[2:] |
1291 return 1 | 1462 return 1 |
1292 | 1463 |
1293 def handle_tc(self): | 1464 def handle_tc(self): |
1294 """Handle a translator comment.""" | 1465 """Handle a translator comment.""" |
1295 if self.current_state in ['MC', 'MS', 'MX']: | 1466 if self.current_state in ['mc', 'ms', 'mx']: |
1296 self.instance.append(self.current_entry) | 1467 self.instance.append(self.current_entry) |
1297 self.current_entry = POEntry() | 1468 self.current_entry = POEntry(linenum=self.current_line) |
1298 if self.current_entry.tcomment != '': | 1469 if self.current_entry.tcomment != '': |
1299 self.current_entry.tcomment += '\n' | 1470 self.current_entry.tcomment += '\n' |
1300 self.current_entry.tcomment += self.current_token[2:] | 1471 tcomment = self.current_token.lstrip('#') |
1472 if tcomment.startswith(' '): | |
1473 tcomment = tcomment[1:] | |
1474 self.current_entry.tcomment += tcomment | |
1301 return True | 1475 return True |
1302 | 1476 |
1303 def handle_gc(self): | 1477 def handle_gc(self): |
1304 """Handle a generated comment.""" | 1478 """Handle a generated comment.""" |
1305 if self.current_state in ['MC', 'MS', 'MX']: | 1479 if self.current_state in ['mc', 'ms', 'mx']: |
1306 self.instance.append(self.current_entry) | 1480 self.instance.append(self.current_entry) |
1307 self.current_entry = POEntry() | 1481 self.current_entry = POEntry(linenum=self.current_line) |
1308 if self.current_entry.comment != '': | 1482 if self.current_entry.comment != '': |
1309 self.current_entry.comment += '\n' | 1483 self.current_entry.comment += '\n' |
1310 self.current_entry.comment += self.current_token[3:] | 1484 self.current_entry.comment += self.current_token[3:] |
1311 return True | 1485 return True |
1312 | 1486 |
1313 def handle_oc(self): | 1487 def handle_oc(self): |
1314 """Handle a file:num occurence.""" | 1488 """Handle a file:num occurrence.""" |
1315 if self.current_state in ['MC', 'MS', 'MX']: | 1489 if self.current_state in ['mc', 'ms', 'mx']: |
1316 self.instance.append(self.current_entry) | 1490 self.instance.append(self.current_entry) |
1317 self.current_entry = POEntry() | 1491 self.current_entry = POEntry(linenum=self.current_line) |
1318 occurrences = self.current_token[3:].split() | 1492 occurrences = self.current_token[3:].split() |
1319 for occurrence in occurrences: | 1493 for occurrence in occurrences: |
1320 if occurrence != '': | 1494 if occurrence != '': |
1321 try: | 1495 try: |
1322 fil, line = occurrence.split(':') | 1496 fil, line = occurrence.split(':') |
1323 if not line.isdigit(): | 1497 if not line.isdigit(): |
1324 fil = fil + line | 1498 fil = fil + line |
1325 line = '' | 1499 line = '' |
1326 self.current_entry.occurrences.append((fil, line)) | 1500 self.current_entry.occurrences.append((fil, line)) |
1327 except: | 1501 except (ValueError, AttributeError): |
1328 self.current_entry.occurrences.append((occurrence, '')) | 1502 self.current_entry.occurrences.append((occurrence, '')) |
1329 return True | 1503 return True |
1330 | 1504 |
1331 def handle_fl(self): | 1505 def handle_fl(self): |
1332 """Handle a flags line.""" | 1506 """Handle a flags line.""" |
1333 if self.current_state in ['MC', 'MS', 'MX']: | 1507 if self.current_state in ['mc', 'ms', 'mx']: |
1334 self.instance.append(self.current_entry) | 1508 self.instance.append(self.current_entry) |
1335 self.current_entry = POEntry() | 1509 self.current_entry = POEntry(linenum=self.current_line) |
1336 self.current_entry.flags += self.current_token[3:].split(', ') | 1510 self.current_entry.flags += [c.strip() for c in |
1511 self.current_token[3:].split(',')] | |
1337 return True | 1512 return True |
1338 | 1513 |
1339 def handle_pp(self): | 1514 def handle_pp(self): |
1340 """Handle a previous msgid_plural line.""" | 1515 """Handle a previous msgid_plural line.""" |
1341 if self.current_state in ['MC', 'MS', 'MX']: | 1516 if self.current_state in ['mc', 'ms', 'mx']: |
1342 self.instance.append(self.current_entry) | 1517 self.instance.append(self.current_entry) |
1343 self.current_entry = POEntry() | 1518 self.current_entry = POEntry(linenum=self.current_line) |
1344 self.current_entry.previous_msgid_plural = \ | 1519 self.current_entry.previous_msgid_plural = \ |
1345 unescape(self.current_token[1:-1]) | 1520 unescape(self.current_token[1:-1]) |
1346 return True | 1521 return True |
1347 | 1522 |
1348 def handle_pm(self): | 1523 def handle_pm(self): |
1349 """Handle a previous msgid line.""" | 1524 """Handle a previous msgid line.""" |
1350 if self.current_state in ['MC', 'MS', 'MX']: | 1525 if self.current_state in ['mc', 'ms', 'mx']: |
1351 self.instance.append(self.current_entry) | 1526 self.instance.append(self.current_entry) |
1352 self.current_entry = POEntry() | 1527 self.current_entry = POEntry(linenum=self.current_line) |
1353 self.current_entry.previous_msgid = \ | 1528 self.current_entry.previous_msgid = \ |
1354 unescape(self.current_token[1:-1]) | 1529 unescape(self.current_token[1:-1]) |
1355 return True | 1530 return True |
1356 | 1531 |
1357 def handle_pc(self): | 1532 def handle_pc(self): |
1358 """Handle a previous msgctxt line.""" | 1533 """Handle a previous msgctxt line.""" |
1359 if self.current_state in ['MC', 'MS', 'MX']: | 1534 if self.current_state in ['mc', 'ms', 'mx']: |
1360 self.instance.append(self.current_entry) | 1535 self.instance.append(self.current_entry) |
1361 self.current_entry = POEntry() | 1536 self.current_entry = POEntry(linenum=self.current_line) |
1362 self.current_entry.previous_msgctxt = \ | 1537 self.current_entry.previous_msgctxt = \ |
1363 unescape(self.current_token[1:-1]) | 1538 unescape(self.current_token[1:-1]) |
1364 return True | 1539 return True |
1365 | 1540 |
1366 def handle_ct(self): | 1541 def handle_ct(self): |
1367 """Handle a msgctxt.""" | 1542 """Handle a msgctxt.""" |
1368 if self.current_state in ['MC', 'MS', 'MX']: | 1543 if self.current_state in ['mc', 'ms', 'mx']: |
1369 self.instance.append(self.current_entry) | 1544 self.instance.append(self.current_entry) |
1370 self.current_entry = POEntry() | 1545 self.current_entry = POEntry(linenum=self.current_line) |
1371 self.current_entry.msgctxt = unescape(self.current_token[1:-1]) | 1546 self.current_entry.msgctxt = unescape(self.current_token[1:-1]) |
1372 return True | 1547 return True |
1373 | 1548 |
1374 def handle_mi(self): | 1549 def handle_mi(self): |
1375 """Handle a msgid.""" | 1550 """Handle a msgid.""" |
1376 if self.current_state in ['MC', 'MS', 'MX']: | 1551 if self.current_state in ['mc', 'ms', 'mx']: |
1377 self.instance.append(self.current_entry) | 1552 self.instance.append(self.current_entry) |
1378 self.current_entry = POEntry() | 1553 self.current_entry = POEntry(linenum=self.current_line) |
1379 self.current_entry.obsolete = self.entry_obsolete | 1554 self.current_entry.obsolete = self.entry_obsolete |
1380 self.current_entry.msgid = unescape(self.current_token[1:-1]) | 1555 self.current_entry.msgid = unescape(self.current_token[1:-1]) |
1381 return True | 1556 return True |
1382 | 1557 |
1383 def handle_mp(self): | 1558 def handle_mp(self): |
1390 self.current_entry.msgstr = unescape(self.current_token[1:-1]) | 1565 self.current_entry.msgstr = unescape(self.current_token[1:-1]) |
1391 return True | 1566 return True |
1392 | 1567 |
1393 def handle_mx(self): | 1568 def handle_mx(self): |
1394 """Handle a msgstr plural.""" | 1569 """Handle a msgstr plural.""" |
1395 index, value = self.current_token[7], self.current_token[11:-1] | 1570 index = self.current_token[7] |
1396 self.current_entry.msgstr_plural[index] = unescape(value) | 1571 value = self.current_token[self.current_token.find('"') + 1:-1] |
1397 self.msgstr_index = index | 1572 self.current_entry.msgstr_plural[int(index)] = unescape(value) |
1573 self.msgstr_index = int(index) | |
1398 return True | 1574 return True |
1399 | 1575 |
1400 def handle_mc(self): | 1576 def handle_mc(self): |
1401 """Handle a msgid or msgstr continuation line.""" | 1577 """Handle a msgid or msgstr continuation line.""" |
1402 token = unescape(self.current_token[1:-1]) | 1578 token = unescape(self.current_token[1:-1]) |
1403 if self.current_state == 'CT': | 1579 if self.current_state == 'ct': |
1404 typ = 'msgctxt' | |
1405 self.current_entry.msgctxt += token | 1580 self.current_entry.msgctxt += token |
1406 elif self.current_state == 'MI': | 1581 elif self.current_state == 'mi': |
1407 typ = 'msgid' | |
1408 self.current_entry.msgid += token | 1582 self.current_entry.msgid += token |
1409 elif self.current_state == 'MP': | 1583 elif self.current_state == 'mp': |
1410 typ = 'msgid_plural' | |
1411 self.current_entry.msgid_plural += token | 1584 self.current_entry.msgid_plural += token |
1412 elif self.current_state == 'MS': | 1585 elif self.current_state == 'ms': |
1413 typ = 'msgstr' | |
1414 self.current_entry.msgstr += token | 1586 self.current_entry.msgstr += token |
1415 elif self.current_state == 'MX': | 1587 elif self.current_state == 'mx': |
1416 typ = 'msgstr[%s]' % self.msgstr_index | |
1417 self.current_entry.msgstr_plural[self.msgstr_index] += token | 1588 self.current_entry.msgstr_plural[self.msgstr_index] += token |
1418 elif self.current_state == 'PP': | 1589 elif self.current_state == 'pp': |
1419 typ = 'previous_msgid_plural' | |
1420 token = token[3:] | |
1421 self.current_entry.previous_msgid_plural += token | 1590 self.current_entry.previous_msgid_plural += token |
1422 elif self.current_state == 'PM': | 1591 elif self.current_state == 'pm': |
1423 typ = 'previous_msgid' | |
1424 token = token[3:] | |
1425 self.current_entry.previous_msgid += token | 1592 self.current_entry.previous_msgid += token |
1426 elif self.current_state == 'PC': | 1593 elif self.current_state == 'pc': |
1427 typ = 'previous_msgctxt' | |
1428 token = token[3:] | |
1429 self.current_entry.previous_msgctxt += token | 1594 self.current_entry.previous_msgctxt += token |
1430 # don't change the current state | 1595 # don't change the current state |
1431 return False | 1596 return False |
1432 | |
1433 # }}} | 1597 # }}} |
1434 # class _MOFileParser {{{ | 1598 # class _MOFileParser {{{ |
1599 | |
1435 | 1600 |
1436 class _MOFileParser(object): | 1601 class _MOFileParser(object): |
1437 """ | 1602 """ |
1438 A class to parse binary mo files. | 1603 A class to parse binary mo files. |
1439 """ | 1604 """ |
1454 ``check_for_duplicates`` | 1619 ``check_for_duplicates`` |
1455 whether to check for duplicate entries when adding entries to the | 1620 whether to check for duplicate entries when adding entries to the |
1456 file (optional, default: ``False``). | 1621 file (optional, default: ``False``). |
1457 """ | 1622 """ |
1458 self.fhandle = open(mofile, 'rb') | 1623 self.fhandle = open(mofile, 'rb') |
1459 self.instance = MOFile( | 1624 |
1625 klass = kwargs.get('klass') | |
1626 if klass is None: | |
1627 klass = MOFile | |
1628 self.instance = klass( | |
1460 fpath=mofile, | 1629 fpath=mofile, |
1461 encoding=kwargs.get('encoding', default_encoding), | 1630 encoding=kwargs.get('encoding', default_encoding), |
1462 check_for_duplicates=kwargs.get('check_for_duplicates', False) | 1631 check_for_duplicates=kwargs.get('check_for_duplicates', False) |
1463 ) | 1632 ) |
1464 | 1633 |
1634 def __del__(self): | |
1635 """ | |
1636 Make sure the file is closed, this prevents warnings on unclosed file | |
1637 when running tests with python >= 3.2. | |
1638 """ | |
1639 if self.fhandle: | |
1640 self.fhandle.close() | |
1641 | |
1465 def parse(self): | 1642 def parse(self): |
1466 """ | 1643 """ |
1467 Build the instance with the file handle provided in the | 1644 Build the instance with the file handle provided in the |
1468 constructor. | 1645 constructor. |
1469 """ | 1646 """ |
1470 # parse magic number | 1647 # parse magic number |
1471 magic_number = self._readbinary('<I', 4) | 1648 magic_number = self._readbinary('<I', 4) |
1472 if magic_number == MOFile.LITTLE_ENDIAN: | 1649 if magic_number == MOFile.MAGIC: |
1473 ii = '<II' | 1650 ii = '<II' |
1474 elif magic_number == MOFile.BIG_ENDIAN: | 1651 elif magic_number == MOFile.MAGIC_SWAPPED: |
1475 ii = '>II' | 1652 ii = '>II' |
1476 else: | 1653 else: |
1477 raise IOError('Invalid mo file, magic number is incorrect !') | 1654 raise IOError('Invalid mo file, magic number is incorrect !') |
1478 self.instance.magic_number = magic_number | 1655 self.instance.magic_number = magic_number |
1479 # parse the version number and the number of strings | 1656 # parse the version number and the number of strings |
1480 self.instance.version, numofstrings = self._readbinary(ii, 8) | 1657 version, numofstrings = self._readbinary(ii, 8) |
1658 # from MO file format specs: "A program seeing an unexpected major | |
1659 # revision number should stop reading the MO file entirely" | |
1660 if version not in (0, 1): | |
1661 raise IOError('Invalid mo file, unexpected major revision number') | |
1662 self.instance.version = version | |
1481 # original strings and translation strings hash table offset | 1663 # original strings and translation strings hash table offset |
1482 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) | 1664 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) |
1483 # move to msgid hash table and read length and offset of msgids | 1665 # move to msgid hash table and read length and offset of msgids |
1484 self.fhandle.seek(msgids_hash_offset) | 1666 self.fhandle.seek(msgids_hash_offset) |
1485 msgids_index = [] | 1667 msgids_index = [] |
1489 self.fhandle.seek(msgstrs_hash_offset) | 1671 self.fhandle.seek(msgstrs_hash_offset) |
1490 msgstrs_index = [] | 1672 msgstrs_index = [] |
1491 for i in range(numofstrings): | 1673 for i in range(numofstrings): |
1492 msgstrs_index.append(self._readbinary(ii, 8)) | 1674 msgstrs_index.append(self._readbinary(ii, 8)) |
1493 # build entries | 1675 # build entries |
1676 encoding = self.instance.encoding | |
1494 for i in range(numofstrings): | 1677 for i in range(numofstrings): |
1495 self.fhandle.seek(msgids_index[i][1]) | 1678 self.fhandle.seek(msgids_index[i][1]) |
1496 msgid = self.fhandle.read(msgids_index[i][0]) | 1679 msgid = self.fhandle.read(msgids_index[i][0]) |
1680 | |
1497 self.fhandle.seek(msgstrs_index[i][1]) | 1681 self.fhandle.seek(msgstrs_index[i][1]) |
1498 msgstr = self.fhandle.read(msgstrs_index[i][0]) | 1682 msgstr = self.fhandle.read(msgstrs_index[i][0]) |
1499 if i == 0: # metadata | 1683 if i == 0 and not msgid: # metadata |
1500 raw_metadata, metadata = msgstr.split('\n'), {} | 1684 raw_metadata, metadata = msgstr.split(b('\n')), {} |
1501 for line in raw_metadata: | 1685 for line in raw_metadata: |
1502 tokens = line.split(':', 1) | 1686 tokens = line.split(b(':'), 1) |
1503 if tokens[0] != '': | 1687 if tokens[0] != b(''): |
1504 try: | 1688 try: |
1505 metadata[tokens[0]] = tokens[1].strip() | 1689 k = tokens[0].decode(encoding) |
1690 v = tokens[1].decode(encoding) | |
1691 metadata[k] = v.strip() | |
1506 except IndexError: | 1692 except IndexError: |
1507 metadata[tokens[0]] = '' | 1693 metadata[k] = u('') |
1508 self.instance.metadata = metadata | 1694 self.instance.metadata = metadata |
1509 continue | 1695 continue |
1510 # test if we have a plural entry | 1696 # test if we have a plural entry |
1511 msgid_tokens = msgid.split('\0') | 1697 msgid_tokens = msgid.split(b('\0')) |
1512 if len(msgid_tokens) > 1: | 1698 if len(msgid_tokens) > 1: |
1513 entry = self._build_entry( | 1699 entry = self._build_entry( |
1514 msgid=msgid_tokens[0], | 1700 msgid=msgid_tokens[0], |
1515 msgid_plural=msgid_tokens[1], | 1701 msgid_plural=msgid_tokens[1], |
1516 msgstr_plural=dict((k,v) for k,v in enumerate(msgstr.split('\0'))) | 1702 msgstr_plural=dict((k, v) for k, v in |
1703 enumerate(msgstr.split(b('\0')))) | |
1517 ) | 1704 ) |
1518 else: | 1705 else: |
1519 entry = self._build_entry(msgid=msgid, msgstr=msgstr) | 1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr) |
1520 self.instance.append(entry) | 1707 self.instance.append(entry) |
1521 # close opened file | 1708 # close opened file |
1522 self.fhandle.close() | 1709 self.fhandle.close() |
1523 return self.instance | 1710 return self.instance |
1524 | 1711 |
1525 def _build_entry(self, msgid, msgstr=None, msgid_plural=None, | 1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None, |
1526 msgstr_plural=None): | 1713 msgstr_plural=None): |
1527 msgctxt_msgid = msgid.split('\x04') | 1714 msgctxt_msgid = msgid.split(b('\x04')) |
1715 encoding = self.instance.encoding | |
1528 if len(msgctxt_msgid) > 1: | 1716 if len(msgctxt_msgid) > 1: |
1529 kwargs = { | 1717 kwargs = { |
1530 'msgctxt': msgctxt_msgid[0], | 1718 'msgctxt': msgctxt_msgid[0].decode(encoding), |
1531 'msgid' : msgctxt_msgid[1], | 1719 'msgid': msgctxt_msgid[1].decode(encoding), |
1532 } | 1720 } |
1533 else: | 1721 else: |
1534 kwargs = {'msgid': msgid} | 1722 kwargs = {'msgid': msgid.decode(encoding)} |
1535 if msgstr: | 1723 if msgstr: |
1536 kwargs['msgstr'] = msgstr | 1724 kwargs['msgstr'] = msgstr.decode(encoding) |
1537 if msgid_plural: | 1725 if msgid_plural: |
1538 kwargs['msgid_plural'] = msgid_plural | 1726 kwargs['msgid_plural'] = msgid_plural.decode(encoding) |
1539 if msgstr_plural: | 1727 if msgstr_plural: |
1728 for k in msgstr_plural: | |
1729 msgstr_plural[k] = msgstr_plural[k].decode(encoding) | |
1540 kwargs['msgstr_plural'] = msgstr_plural | 1730 kwargs['msgstr_plural'] = msgstr_plural |
1541 return MOEntry(**kwargs) | 1731 return MOEntry(**kwargs) |
1542 | 1732 |
1543 def _readbinary(self, fmt, numbytes): | 1733 def _readbinary(self, fmt, numbytes): |
1544 """ | 1734 """ |
1548 bytes = self.fhandle.read(numbytes) | 1738 bytes = self.fhandle.read(numbytes) |
1549 tup = struct.unpack(fmt, bytes) | 1739 tup = struct.unpack(fmt, bytes) |
1550 if len(tup) == 1: | 1740 if len(tup) == 1: |
1551 return tup[0] | 1741 return tup[0] |
1552 return tup | 1742 return tup |
1553 | |
1554 # }}} | 1743 # }}} |
1744 # class TextWrapper {{{ | |
1745 | |
1746 | |
1747 class TextWrapper(textwrap.TextWrapper): | |
1748 """ | |
1749 Subclass of textwrap.TextWrapper that backport the | |
1750 drop_whitespace option. | |
1751 """ | |
1752 def __init__(self, *args, **kwargs): | |
1753 drop_whitespace = kwargs.pop('drop_whitespace', True) | |
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs) | |
1755 self.drop_whitespace = drop_whitespace | |
1756 | |
1757 def _wrap_chunks(self, chunks): | |
1758 """_wrap_chunks(chunks : [string]) -> [string] | |
1759 | |
1760 Wrap a sequence of text chunks and return a list of lines of | |
1761 length 'self.width' or less. (If 'break_long_words' is false, | |
1762 some lines may be longer than this.) Chunks correspond roughly | |
1763 to words and the whitespace between them: each chunk is | |
1764 indivisible (modulo 'break_long_words'), but a line break can | |
1765 come between any two chunks. Chunks should not have internal | |
1766 whitespace; ie. a chunk is either all whitespace or a "word". | |
1767 Whitespace chunks will be removed from the beginning and end of | |
1768 lines, but apart from that whitespace is preserved. | |
1769 """ | |
1770 lines = [] | |
1771 if self.width <= 0: | |
1772 raise ValueError("invalid width %r (must be > 0)" % self.width) | |
1773 | |
1774 # Arrange in reverse order so items can be efficiently popped | |
1775 # from a stack of chucks. | |
1776 chunks.reverse() | |
1777 | |
1778 while chunks: | |
1779 | |
1780 # Start the list of chunks that will make up the current line. | |
1781 # cur_len is just the length of all the chunks in cur_line. | |
1782 cur_line = [] | |
1783 cur_len = 0 | |
1784 | |
1785 # Figure out which static string will prefix this line. | |
1786 if lines: | |
1787 indent = self.subsequent_indent | |
1788 else: | |
1789 indent = self.initial_indent | |
1790 | |
1791 # Maximum width for this line. | |
1792 width = self.width - len(indent) | |
1793 | |
1794 # First chunk on line is whitespace -- drop it, unless this | |
1795 # is the very beginning of the text (ie. no lines started yet). | |
1796 if self.drop_whitespace and chunks[-1].strip() == '' and lines: | |
1797 del chunks[-1] | |
1798 | |
1799 while chunks: | |
1800 l = len(chunks[-1]) | |
1801 | |
1802 # Can at least squeeze this chunk onto the current line. | |
1803 if cur_len + l <= width: | |
1804 cur_line.append(chunks.pop()) | |
1805 cur_len += l | |
1806 | |
1807 # Nope, this line is full. | |
1808 else: | |
1809 break | |
1810 | |
1811 # The current line is full, and the next chunk is too big to | |
1812 # fit on *any* line (not just this one). | |
1813 if chunks and len(chunks[-1]) > width: | |
1814 self._handle_long_word(chunks, cur_line, cur_len, width) | |
1815 | |
1816 # If the last chunk on this line is all whitespace, drop it. | |
1817 if self.drop_whitespace and cur_line and not cur_line[-1].strip(): | |
1818 del cur_line[-1] | |
1819 | |
1820 # Convert current line back to a string and store it in list | |
1821 # of all lines (return value). | |
1822 if cur_line: | |
1823 lines.append(indent + ''.join(cur_line)) | |
1824 | |
1825 return lines | |
1826 # }}} | |
1827 # function wrap() {{{ | |
1828 | |
1829 | |
1830 def wrap(text, width=70, **kwargs): | |
1831 """ | |
1832 Wrap a single paragraph of text, returning a list of wrapped lines. | |
1833 """ | |
1834 if sys.version_info < (2, 6): | |
1835 return TextWrapper(width=width, **kwargs).wrap(text) | |
1836 return textwrap.wrap(text, width=width, **kwargs) | |
1837 | |
1838 # }}} |