comparison i18n/polib.py @ 43076:2372284d9457

formatting: blacken the codebase This is using my patch to black (https://github.com/psf/black/pull/826) so we don't un-wrap collection literals. Done with: hg files 'set:**.py - mercurial/thirdparty/** - "contrib/python-zstandard/**"' | xargs black -S # skip-blame mass-reformatting only # no-check-commit reformats foo_bar functions Differential Revision: https://phab.mercurial-scm.org/D6971
author Augie Fackler <augie@google.com>
date Sun, 06 Oct 2019 09:45:02 -0400
parents 19fc5a986669
children 04e0e0e73892
comparison
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
15 15
16 from __future__ import absolute_import 16 from __future__ import absolute_import
17 17
18 __author__ = 'David Jean Louis <izimobil@gmail.com>' 18 __author__ = 'David Jean Louis <izimobil@gmail.com>'
19 __version__ = '1.0.7' 19 __version__ = '1.0.7'
20 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 20 __all__ = [
21 'default_encoding', 'escape', 'unescape', 'detect_encoding', ] 21 'pofile',
22 'POFile',
23 'POEntry',
24 'mofile',
25 'MOFile',
26 'MOEntry',
27 'default_encoding',
28 'escape',
29 'unescape',
30 'detect_encoding',
31 ]
22 32
23 import array 33 import array
24 import codecs 34 import codecs
25 import os 35 import os
26 import re 36 import re
53 return s 63 return s
54 64
55 def u(s): 65 def u(s):
56 return unicode(s, "unicode_escape") 66 return unicode(s, "unicode_escape")
57 67
68
58 else: 69 else:
59 PY3 = True 70 PY3 = True
60 text_type = str 71 text_type = str
61 72
62 def b(s): 73 def b(s):
63 return s.encode("latin-1") 74 return s.encode("latin-1")
64 75
65 def u(s): 76 def u(s):
66 return s 77 return s
78
79
67 # }}} 80 # }}}
68 # _pofile_or_mofile {{{ 81 # _pofile_or_mofile {{{
69 82
70 83
71 def _pofile_or_mofile(f, type, **kwargs): 84 def _pofile_or_mofile(f, type, **kwargs):
82 kls = type == 'pofile' and _POFileParser or _MOFileParser 95 kls = type == 'pofile' and _POFileParser or _MOFileParser
83 parser = kls( 96 parser = kls(
84 f, 97 f,
85 encoding=enc, 98 encoding=enc,
86 check_for_duplicates=kwargs.get('check_for_duplicates', False), 99 check_for_duplicates=kwargs.get('check_for_duplicates', False),
87 klass=kwargs.get('klass') 100 klass=kwargs.get('klass'),
88 ) 101 )
89 instance = parser.parse() 102 instance = parser.parse()
90 instance.wrapwidth = kwargs.get('wrapwidth', 78) 103 instance.wrapwidth = kwargs.get('wrapwidth', 78)
91 return instance 104 return instance
105
106
92 # }}} 107 # }}}
93 # _is_file {{{ 108 # _is_file {{{
94 109
95 110
96 def _is_file(filename_or_contents): 111 def _is_file(filename_or_contents):
105 """ 120 """
106 try: 121 try:
107 return os.path.exists(filename_or_contents) 122 return os.path.exists(filename_or_contents)
108 except (ValueError, UnicodeEncodeError): 123 except (ValueError, UnicodeEncodeError):
109 return False 124 return False
125
126
110 # }}} 127 # }}}
111 # function pofile() {{{ 128 # function pofile() {{{
112 129
113 130
114 def pofile(pofile, **kwargs): 131 def pofile(pofile, **kwargs):
137 class which is used to instantiate the return value (optional, 154 class which is used to instantiate the return value (optional,
138 default: ``None``, the return value with be a :class:`~polib.POFile` 155 default: ``None``, the return value with be a :class:`~polib.POFile`
139 instance). 156 instance).
140 """ 157 """
141 return _pofile_or_mofile(pofile, 'pofile', **kwargs) 158 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
159
160
142 # }}} 161 # }}}
143 # function mofile() {{{ 162 # function mofile() {{{
144 163
145 164
146 def mofile(mofile, **kwargs): 165 def mofile(mofile, **kwargs):
170 class which is used to instantiate the return value (optional, 189 class which is used to instantiate the return value (optional,
171 default: ``None``, the return value with be a :class:`~polib.POFile` 190 default: ``None``, the return value with be a :class:`~polib.POFile`
172 instance). 191 instance).
173 """ 192 """
174 return _pofile_or_mofile(mofile, 'mofile', **kwargs) 193 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
194
195
175 # }}} 196 # }}}
176 # function detect_encoding() {{{ 197 # function detect_encoding() {{{
177 198
178 199
179 def detect_encoding(file, binary_mode=False): 200 def detect_encoding(file, binary_mode=False):
227 enc = enc.decode('utf-8') 248 enc = enc.decode('utf-8')
228 if charset_exists(enc): 249 if charset_exists(enc):
229 return enc 250 return enc
230 f.close() 251 f.close()
231 return default_encoding 252 return default_encoding
253
254
232 # }}} 255 # }}}
233 # function escape() {{{ 256 # function escape() {{{
234 257
235 258
236 def escape(st): 259 def escape(st):
237 """ 260 """
238 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in 261 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
239 the given string ``st`` and returns it. 262 the given string ``st`` and returns it.
240 """ 263 """
241 return st.replace('\\', r'\\')\ 264 return (
242 .replace('\t', r'\t')\ 265 st.replace('\\', r'\\')
243 .replace('\r', r'\r')\ 266 .replace('\t', r'\t')
244 .replace('\n', r'\n')\ 267 .replace('\r', r'\r')
245 .replace('\"', r'\"') 268 .replace('\n', r'\n')
269 .replace('\"', r'\"')
270 )
271
272
246 # }}} 273 # }}}
247 # function unescape() {{{ 274 # function unescape() {{{
248 275
249 276
250 def unescape(st): 277 def unescape(st):
251 """ 278 """
252 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in 279 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
253 the given string ``st`` and returns it. 280 the given string ``st`` and returns it.
254 """ 281 """
282
255 def unescape_repl(m): 283 def unescape_repl(m):
256 m = m.group(1) 284 m = m.group(1)
257 if m == 'n': 285 if m == 'n':
258 return '\n' 286 return '\n'
259 if m == 't': 287 if m == 't':
261 if m == 'r': 289 if m == 'r':
262 return '\r' 290 return '\r'
263 if m == '\\': 291 if m == '\\':
264 return '\\' 292 return '\\'
265 return m # handles escaped double quote 293 return m # handles escaped double quote
294
266 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) 295 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
296
297
267 # }}} 298 # }}}
268 # class _BaseFile {{{ 299 # class _BaseFile {{{
269 300
270 301
271 class _BaseFile(list): 302 class _BaseFile(list):
315 def __unicode__(self): 346 def __unicode__(self):
316 """ 347 """
317 Returns the unicode representation of the file. 348 Returns the unicode representation of the file.
318 """ 349 """
319 ret = [] 350 ret = []
320 entries = [self.metadata_as_entry()] + \ 351 entries = [self.metadata_as_entry()] + [
321 [e for e in self if not e.obsolete] 352 e for e in self if not e.obsolete
353 ]
322 for entry in entries: 354 for entry in entries:
323 ret.append(entry.__unicode__(self.wrapwidth)) 355 ret.append(entry.__unicode__(self.wrapwidth))
324 for entry in self.obsolete_entries(): 356 for entry in self.obsolete_entries():
325 ret.append(entry.__unicode__(self.wrapwidth)) 357 ret.append(entry.__unicode__(self.wrapwidth))
326 ret = u('\n').join(ret) 358 ret = u('\n').join(ret)
327 359
328 assert isinstance(ret, text_type) 360 assert isinstance(ret, text_type)
329 #if type(ret) != text_type: 361 # if type(ret) != text_type:
330 # return unicode(ret, self.encoding) 362 # return unicode(ret, self.encoding)
331 return ret 363 return ret
332 364
333 if PY3: 365 if PY3:
366
334 def __str__(self): 367 def __str__(self):
335 return self.__unicode__() 368 return self.__unicode__()
369
336 else: 370 else:
371
337 def __str__(self): 372 def __str__(self):
338 """ 373 """
339 Returns the string representation of the file. 374 Returns the string representation of the file.
340 """ 375 """
341 return unicode(self).encode(self.encoding) 376 return unicode(self).encode(self.encoding)
351 Argument: 386 Argument:
352 387
353 ``entry`` 388 ``entry``
354 an instance of :class:`~polib._BaseEntry`. 389 an instance of :class:`~polib._BaseEntry`.
355 """ 390 """
356 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \ 391 return (
392 self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt)
357 is not None 393 is not None
394 )
358 395
359 def __eq__(self, other): 396 def __eq__(self, other):
360 return str(self) == str(other) 397 return str(self) == str(other)
361 398
362 def append(self, entry): 399 def append(self, entry):
437 fhandle.close() 474 fhandle.close()
438 # set the file path if not set 475 # set the file path if not set
439 if self.fpath is None and fpath: 476 if self.fpath is None and fpath:
440 self.fpath = fpath 477 self.fpath = fpath
441 478
442 def find(self, st, by='msgid', include_obsolete_entries=False, 479 def find(
443 msgctxt=False): 480 self, st, by='msgid', include_obsolete_entries=False, msgctxt=False
481 ):
444 """ 482 """
445 Find the entry which msgid (or property identified by the ``by`` 483 Find the entry which msgid (or property identified by the ``by``
446 argument) matches the string ``st``. 484 argument) matches the string ``st``.
447 485
448 Keyword arguments: 486 Keyword arguments:
488 'Language-Team', 526 'Language-Team',
489 'MIME-Version', 527 'MIME-Version',
490 'Content-Type', 528 'Content-Type',
491 'Content-Transfer-Encoding', 529 'Content-Transfer-Encoding',
492 'Language', 530 'Language',
493 'Plural-Forms' 531 'Plural-Forms',
494 ] 532 ]
495 ordered_data = [] 533 ordered_data = []
496 for data in data_order: 534 for data in data_order:
497 try: 535 try:
498 value = metadata.pop(data) 536 value = metadata.pop(data)
522 return 1 560 return 1
523 elif self_msgid < other_msgid: 561 elif self_msgid < other_msgid:
524 return -1 562 return -1
525 else: 563 else:
526 return 0 564 return 0
565
527 # add metadata entry 566 # add metadata entry
528 entries.sort(key=lambda o: o.msgctxt or o.msgid) 567 entries.sort(key=lambda o: o.msgctxt or o.msgid)
529 mentry = self.metadata_as_entry() 568 mentry = self.metadata_as_entry()
530 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() 569 # mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
531 entries = [mentry] + entries 570 entries = [mentry] + entries
532 entries_len = len(entries) 571 entries_len = len(entries)
533 ids, strs = b(''), b('') 572 ids, strs = b(''), b('')
534 for e in entries: 573 for e in entries:
535 # For each string, we need size and file offset. Each string is 574 # For each string, we need size and file offset. Each string is
576 # start of key index 615 # start of key index
577 7 * 4, 616 7 * 4,
578 # start of value index 617 # start of value index
579 7 * 4 + entries_len * 8, 618 7 * 4 + entries_len * 8,
580 # size and offset of hash table, we don't use hash tables 619 # size and offset of hash table, we don't use hash tables
581 0, keystart 620 0,
582 621 keystart,
583 ) 622 )
584 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior 623 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
585 output += array.array("i", offsets).tobytes() 624 output += array.array("i", offsets).tobytes()
586 else: 625 else:
587 output += array.array("i", offsets).tostring() 626 output += array.array("i", offsets).tostring()
595 only if it's an unicode string and returns the encoded string. 634 only if it's an unicode string and returns the encoded string.
596 """ 635 """
597 if isinstance(mixed, text_type): 636 if isinstance(mixed, text_type):
598 mixed = mixed.encode(self.encoding) 637 mixed = mixed.encode(self.encoding)
599 return mixed 638 return mixed
639
640
600 # }}} 641 # }}}
601 # class POFile {{{ 642 # class POFile {{{
602 643
603 644
604 class POFile(_BaseFile): 645 class POFile(_BaseFile):
656 697
657 def untranslated_entries(self): 698 def untranslated_entries(self):
658 """ 699 """
659 Convenience method that returns the list of untranslated entries. 700 Convenience method that returns the list of untranslated entries.
660 """ 701 """
661 return [e for e in self if not e.translated() and not e.obsolete 702 return [
662 and not 'fuzzy' in e.flags] 703 e
704 for e in self
705 if not e.translated() and not e.obsolete and not 'fuzzy' in e.flags
706 ]
663 707
664 def fuzzy_entries(self): 708 def fuzzy_entries(self):
665 """ 709 """
666 Convenience method that returns the list of fuzzy entries. 710 Convenience method that returns the list of fuzzy entries.
667 """ 711 """
701 e.merge(entry) 745 e.merge(entry)
702 # ok, now we must "obsolete" entries that are not in the refpot anymore 746 # ok, now we must "obsolete" entries that are not in the refpot anymore
703 for entry in self: 747 for entry in self:
704 if entry.msgid not in refpot_msgids: 748 if entry.msgid not in refpot_msgids:
705 entry.obsolete = True 749 entry.obsolete = True
750
751
706 # }}} 752 # }}}
707 # class MOFile {{{ 753 # class MOFile {{{
708 754
709 755
710 class MOFile(_BaseFile): 756 class MOFile(_BaseFile):
711 """ 757 """
712 Mo file reader/writer. 758 Mo file reader/writer.
713 This class inherits the :class:`~polib._BaseFile` class and, by 759 This class inherits the :class:`~polib._BaseFile` class and, by
714 extension, the python ``list`` type. 760 extension, the python ``list`` type.
715 """ 761 """
716 MAGIC = 0x950412de 762
717 MAGIC_SWAPPED = 0xde120495 763 MAGIC = 0x950412DE
764 MAGIC_SWAPPED = 0xDE120495
718 765
719 def __init__(self, *args, **kwargs): 766 def __init__(self, *args, **kwargs):
720 """ 767 """
721 Constructor, accepts all keywords arguments accepted by 768 Constructor, accepts all keywords arguments accepted by
722 :class:`~polib._BaseFile` class. 769 :class:`~polib._BaseFile` class.
774 def obsolete_entries(self): 821 def obsolete_entries(self):
775 """ 822 """
776 Convenience method to keep the same interface with POFile instances. 823 Convenience method to keep the same interface with POFile instances.
777 """ 824 """
778 return [] 825 return []
826
827
779 # }}} 828 # }}}
780 # class _BaseEntry {{{ 829 # class _BaseEntry {{{
781 830
782 831
783 class _BaseEntry(object): 832 class _BaseEntry(object):
829 else: 878 else:
830 delflag = '' 879 delflag = ''
831 ret = [] 880 ret = []
832 # write the msgctxt if any 881 # write the msgctxt if any
833 if self.msgctxt is not None: 882 if self.msgctxt is not None:
834 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, 883 ret += self._str_field(
835 wrapwidth) 884 "msgctxt", delflag, "", self.msgctxt, wrapwidth
885 )
836 # write the msgid 886 # write the msgid
837 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) 887 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
838 # write the msgid_plural if any 888 # write the msgid_plural if any
839 if self.msgid_plural: 889 if self.msgid_plural:
840 ret += self._str_field("msgid_plural", delflag, "", 890 ret += self._str_field(
841 self.msgid_plural, wrapwidth) 891 "msgid_plural", delflag, "", self.msgid_plural, wrapwidth
892 )
842 if self.msgstr_plural: 893 if self.msgstr_plural:
843 # write the msgstr_plural if any 894 # write the msgstr_plural if any
844 msgstrs = self.msgstr_plural 895 msgstrs = self.msgstr_plural
845 keys = list(msgstrs) 896 keys = list(msgstrs)
846 keys.sort() 897 keys.sort()
847 for index in keys: 898 for index in keys:
848 msgstr = msgstrs[index] 899 msgstr = msgstrs[index]
849 plural_index = '[%s]' % index 900 plural_index = '[%s]' % index
850 ret += self._str_field("msgstr", delflag, plural_index, msgstr, 901 ret += self._str_field(
851 wrapwidth) 902 "msgstr", delflag, plural_index, msgstr, wrapwidth
903 )
852 else: 904 else:
853 # otherwise write the msgstr 905 # otherwise write the msgstr
854 ret += self._str_field("msgstr", delflag, "", self.msgstr, 906 ret += self._str_field(
855 wrapwidth) 907 "msgstr", delflag, "", self.msgstr, wrapwidth
908 )
856 ret.append('') 909 ret.append('')
857 ret = u('\n').join(ret) 910 ret = u('\n').join(ret)
858 return ret 911 return ret
859 912
860 if PY3: 913 if PY3:
914
861 def __str__(self): 915 def __str__(self):
862 return self.__unicode__() 916 return self.__unicode__()
917
863 else: 918 else:
919
864 def __str__(self): 920 def __str__(self):
865 """ 921 """
866 Returns the string representation of the entry. 922 Returns the string representation of the entry.
867 """ 923 """
868 return unicode(self).encode(self.encoding) 924 return unicode(self).encode(self.encoding)
869 925
870 def __eq__(self, other): 926 def __eq__(self, other):
871 return str(self) == str(other) 927 return str(self) == str(other)
872 928
873 def _str_field(self, fieldname, delflag, plural_index, field, 929 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
874 wrapwidth=78):
875 lines = field.splitlines(True) 930 lines = field.splitlines(True)
876 if len(lines) > 1: 931 if len(lines) > 1:
877 lines = [''] + lines # start with initial empty line 932 lines = [''] + lines # start with initial empty line
878 else: 933 else:
879 escaped_field = escape(field) 934 escaped_field = escape(field)
886 if plural_index: 941 if plural_index:
887 flength += len(plural_index) 942 flength += len(plural_index)
888 real_wrapwidth = wrapwidth - flength + specialchars_count 943 real_wrapwidth = wrapwidth - flength + specialchars_count
889 if wrapwidth > 0 and len(field) > real_wrapwidth: 944 if wrapwidth > 0 and len(field) > real_wrapwidth:
890 # Wrap the line but take field name into account 945 # Wrap the line but take field name into account
891 lines = [''] + [unescape(item) for item in wrap( 946 lines = [''] + [
892 escaped_field, 947 unescape(item)
893 wrapwidth - 2, # 2 for quotes "" 948 for item in wrap(
894 drop_whitespace=False, 949 escaped_field,
895 break_long_words=False 950 wrapwidth - 2, # 2 for quotes ""
896 )] 951 drop_whitespace=False,
952 break_long_words=False,
953 )
954 ]
897 else: 955 else:
898 lines = [field] 956 lines = [field]
899 if fieldname.startswith('previous_'): 957 if fieldname.startswith('previous_'):
900 # quick and dirty trick to get the real field name 958 # quick and dirty trick to get the real field name
901 fieldname = fieldname[9:] 959 fieldname = fieldname[9:]
902 960
903 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 961 ret = [
904 escape(lines.pop(0)))] 962 '%s%s%s "%s"'
963 % (delflag, fieldname, plural_index, escape(lines.pop(0)))
964 ]
905 for line in lines: 965 for line in lines:
906 ret.append('%s"%s"' % (delflag, escape(line))) 966 ret.append('%s"%s"' % (delflag, escape(line)))
907 return ret 967 return ret
968
969
908 # }}} 970 # }}}
909 # class POEntry {{{ 971 # class POEntry {{{
910 972
911 973
912 class POEntry(_BaseEntry): 974 class POEntry(_BaseEntry):
970 ret += wrap( 1032 ret += wrap(
971 comment, 1033 comment,
972 wrapwidth, 1034 wrapwidth,
973 initial_indent=c[1], 1035 initial_indent=c[1],
974 subsequent_indent=c[1], 1036 subsequent_indent=c[1],
975 break_long_words=False 1037 break_long_words=False,
976 ) 1038 )
977 else: 1039 else:
978 ret.append('%s%s' % (c[1], comment)) 1040 ret.append('%s%s' % (c[1], comment))
979 1041
980 # occurrences (with text wrapping as xgettext does) 1042 # occurrences (with text wrapping as xgettext does)
989 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: 1051 if wrapwidth > 0 and len(filestr) + 3 > wrapwidth:
990 # textwrap split words that contain hyphen, this is not 1052 # textwrap split words that contain hyphen, this is not
991 # what we want for filenames, so the dirty hack is to 1053 # what we want for filenames, so the dirty hack is to
992 # temporally replace hyphens with a char that a file cannot 1054 # temporally replace hyphens with a char that a file cannot
993 # contain, like "*" 1055 # contain, like "*"
994 ret += [l.replace('*', '-') for l in wrap( 1056 ret += [
995 filestr.replace('-', '*'), 1057 l.replace('*', '-')
996 wrapwidth, 1058 for l in wrap(
997 initial_indent='#: ', 1059 filestr.replace('-', '*'),
998 subsequent_indent='#: ', 1060 wrapwidth,
999 break_long_words=False 1061 initial_indent='#: ',
1000 )] 1062 subsequent_indent='#: ',
1063 break_long_words=False,
1064 )
1065 ]
1001 else: 1066 else:
1002 ret.append('#: ' + filestr) 1067 ret.append('#: ' + filestr)
1003 1068
1004 # flags (TODO: wrapping ?) 1069 # flags (TODO: wrapping ?)
1005 if self.flags: 1070 if self.flags:
1006 ret.append('#, %s' % ', '.join(self.flags)) 1071 ret.append('#, %s' % ', '.join(self.flags))
1007 1072
1008 # previous context and previous msgid/msgid_plural 1073 # previous context and previous msgid/msgid_plural
1009 fields = ['previous_msgctxt', 'previous_msgid', 1074 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1010 'previous_msgid_plural']
1011 for f in fields: 1075 for f in fields:
1012 val = getattr(self, f) 1076 val = getattr(self, f)
1013 if val: 1077 if val:
1014 ret += self._str_field(f, "#| ", "", val, wrapwidth) 1078 ret += self._str_field(f, "#| ", "", val, wrapwidth)
1015 1079
1016 ret.append(_BaseEntry.__unicode__(self, wrapwidth)) 1080 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1017 ret = u('\n').join(ret) 1081 ret = u('\n').join(ret)
1018 1082
1019 assert isinstance(ret, text_type) 1083 assert isinstance(ret, text_type)
1020 #if type(ret) != types.UnicodeType: 1084 # if type(ret) != types.UnicodeType:
1021 # return unicode(ret, self.encoding) 1085 # return unicode(ret, self.encoding)
1022 return ret 1086 return ret
1023 1087
1024 def __cmp__(self, other): 1088 def __cmp__(self, other):
1025 """ 1089 """
1129 except KeyError: 1193 except KeyError:
1130 self.msgstr_plural[pos] = '' 1194 self.msgstr_plural[pos] = ''
1131 1195
1132 def __hash__(self): 1196 def __hash__(self):
1133 return hash((self.msgid, self.msgstr)) 1197 return hash((self.msgid, self.msgstr))
1198
1199
1134 # }}} 1200 # }}}
1135 # class MOEntry {{{ 1201 # class MOEntry {{{
1136 1202
1137 1203
1138 class MOEntry(_BaseEntry): 1204 class MOEntry(_BaseEntry):
1139 """ 1205 """
1140 Represents a mo file entry. 1206 Represents a mo file entry.
1141 """ 1207 """
1208
1142 def __init__(self, *args, **kwargs): 1209 def __init__(self, *args, **kwargs):
1143 """ 1210 """
1144 Constructor, accepts the following keyword arguments, 1211 Constructor, accepts the following keyword arguments,
1145 for consistency with :class:`~polib.POEntry`: 1212 for consistency with :class:`~polib.POEntry`:
1146 1213
1165 self.previous_msgid = None 1232 self.previous_msgid = None
1166 self.previous_msgid_plural = None 1233 self.previous_msgid_plural = None
1167 1234
1168 def __hash__(self): 1235 def __hash__(self):
1169 return hash((self.msgid, self.msgstr)) 1236 return hash((self.msgid, self.msgstr))
1237
1170 1238
1171 # }}} 1239 # }}}
1172 # class _POFileParser {{{ 1240 # class _POFileParser {{{
1173 1241
1174 1242
1209 if klass is None: 1277 if klass is None:
1210 klass = POFile 1278 klass = POFile
1211 self.instance = klass( 1279 self.instance = klass(
1212 pofile=pofile, 1280 pofile=pofile,
1213 encoding=enc, 1281 encoding=enc,
1214 check_for_duplicates=kwargs.get('check_for_duplicates', False) 1282 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1215 ) 1283 )
1216 self.transitions = {} 1284 self.transitions = {}
1217 self.current_line = 0 1285 self.current_line = 0
1218 self.current_entry = POEntry(linenum=self.current_line) 1286 self.current_entry = POEntry(linenum=self.current_line)
1219 self.current_state = 'st' 1287 self.current_state = 'st'
1236 # * MI: a msgid 1304 # * MI: a msgid
1237 # * MP: a msgid plural 1305 # * MP: a msgid plural
1238 # * MS: a msgstr 1306 # * MS: a msgstr
1239 # * MX: a msgstr plural 1307 # * MX: a msgstr plural
1240 # * MC: a msgid or msgstr continuation line 1308 # * MC: a msgid or msgstr continuation line
1241 all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc', 1309 all = [
1242 'ms', 'mp', 'mx', 'mi'] 1310 'st',
1243 1311 'he',
1244 self.add('tc', ['st', 'he'], 'he') 1312 'gc',
1245 self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 1313 'oc',
1246 'mp', 'mx', 'mi'], 'tc') 1314 'fl',
1247 self.add('gc', all, 'gc') 1315 'ct',
1248 self.add('oc', all, 'oc') 1316 'pc',
1249 self.add('fl', all, 'fl') 1317 'pm',
1250 self.add('pc', all, 'pc') 1318 'pp',
1251 self.add('pm', all, 'pm') 1319 'tc',
1252 self.add('pp', all, 'pp') 1320 'ms',
1253 self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 1321 'mp',
1254 'pp', 'ms', 'mx'], 'ct') 1322 'mx',
1255 self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', 1323 'mi',
1256 'pm', 'pp', 'ms', 'mx'], 'mi') 1324 ]
1257 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp') 1325
1258 self.add('ms', ['mi', 'mp', 'tc'], 'ms') 1326 self.add('tc', ['st', 'he'], 'he')
1259 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx') 1327 self.add(
1328 'tc',
1329 ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'],
1330 'tc',
1331 )
1332 self.add('gc', all, 'gc')
1333 self.add('oc', all, 'oc')
1334 self.add('fl', all, 'fl')
1335 self.add('pc', all, 'pc')
1336 self.add('pm', all, 'pm')
1337 self.add('pp', all, 'pp')
1338 self.add(
1339 'ct',
1340 ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
1341 'ct',
1342 )
1343 self.add(
1344 'mi',
1345 [
1346 'st',
1347 'he',
1348 'gc',
1349 'oc',
1350 'fl',
1351 'ct',
1352 'tc',
1353 'pc',
1354 'pm',
1355 'pp',
1356 'ms',
1357 'mx',
1358 ],
1359 'mi',
1360 )
1361 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1362 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1363 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1260 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc') 1364 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1261 1365
1262 def parse(self): 1366 def parse(self):
1263 """ 1367 """
1264 Run the state machine, parse the file line by line and call process() 1368 Run the state machine, parse the file line by line and call process()
1298 self.entry_obsolete = 0 1402 self.entry_obsolete = 0
1299 1403
1300 # Take care of keywords like 1404 # Take care of keywords like
1301 # msgid, msgid_plural, msgctxt & msgstr. 1405 # msgid, msgid_plural, msgctxt & msgstr.
1302 if tokens[0] in keywords and nb_tokens > 1: 1406 if tokens[0] in keywords and nb_tokens > 1:
1303 line = line[len(tokens[0]):].lstrip() 1407 line = line[len(tokens[0]) :].lstrip()
1304 if re.search(r'([^\\]|^)"', line[1:-1]): 1408 if re.search(r'([^\\]|^)"', line[1:-1]):
1305 raise IOError('Syntax error in po file %s (line %s): ' 1409 raise IOError(
1306 'unescaped double quote found' % 1410 'Syntax error in po file %s (line %s): '
1307 (self.instance.fpath, self.current_line)) 1411 'unescaped double quote found'
1412 % (self.instance.fpath, self.current_line)
1413 )
1308 self.current_token = line 1414 self.current_token = line
1309 self.process(keywords[tokens[0]]) 1415 self.process(keywords[tokens[0]])
1310 continue 1416 continue
1311 1417
1312 self.current_token = line 1418 self.current_token = line
1318 self.process('oc') 1424 self.process('oc')
1319 1425
1320 elif line[:1] == '"': 1426 elif line[:1] == '"':
1321 # we are on a continuation line 1427 # we are on a continuation line
1322 if re.search(r'([^\\]|^)"', line[1:-1]): 1428 if re.search(r'([^\\]|^)"', line[1:-1]):
1323 raise IOError('Syntax error in po file %s (line %s): ' 1429 raise IOError(
1324 'unescaped double quote found' % 1430 'Syntax error in po file %s (line %s): '
1325 (self.instance.fpath, self.current_line)) 1431 'unescaped double quote found'
1432 % (self.instance.fpath, self.current_line)
1433 )
1326 self.process('mc') 1434 self.process('mc')
1327 1435
1328 elif line[:7] == 'msgstr[': 1436 elif line[:7] == 'msgstr[':
1329 # we are on a msgstr plural 1437 # we are on a msgstr plural
1330 self.process('mx') 1438 self.process('mx')
1347 # we are on a generated comment line 1455 # we are on a generated comment line
1348 self.process('gc') 1456 self.process('gc')
1349 1457
1350 elif tokens[0] == '#|': 1458 elif tokens[0] == '#|':
1351 if nb_tokens <= 1: 1459 if nb_tokens <= 1:
1352 raise IOError('Syntax error in po file %s (line %s)' % 1460 raise IOError(
1353 (self.instance.fpath, self.current_line)) 1461 'Syntax error in po file %s (line %s)'
1462 % (self.instance.fpath, self.current_line)
1463 )
1354 1464
1355 # Remove the marker and any whitespace right after that. 1465 # Remove the marker and any whitespace right after that.
1356 line = line[2:].lstrip() 1466 line = line[2:].lstrip()
1357 self.current_token = line 1467 self.current_token = line
1358 1468
1361 self.process('mc') 1471 self.process('mc')
1362 continue 1472 continue
1363 1473
1364 if nb_tokens == 2: 1474 if nb_tokens == 2:
1365 # Invalid continuation line. 1475 # Invalid continuation line.
1366 raise IOError('Syntax error in po file %s (line %s): ' 1476 raise IOError(
1367 'invalid continuation line' % 1477 'Syntax error in po file %s (line %s): '
1368 (self.instance.fpath, self.current_line)) 1478 'invalid continuation line'
1479 % (self.instance.fpath, self.current_line)
1480 )
1369 1481
1370 # we are on a "previous translation" comment line, 1482 # we are on a "previous translation" comment line,
1371 if tokens[1] not in prev_keywords: 1483 if tokens[1] not in prev_keywords:
1372 # Unknown keyword in previous translation comment. 1484 # Unknown keyword in previous translation comment.
1373 raise IOError('Syntax error in po file %s (line %s): ' 1485 raise IOError(
1374 'unknown keyword %s' % 1486 'Syntax error in po file %s (line %s): '
1375 (self.instance.fpath, self.current_line, 1487 'unknown keyword %s'
1376 tokens[1])) 1488 % (self.instance.fpath, self.current_line, tokens[1])
1489 )
1377 1490
1378 # Remove the keyword and any whitespace 1491 # Remove the keyword and any whitespace
1379 # between it and the starting quote. 1492 # between it and the starting quote.
1380 line = line[len(tokens[1]):].lstrip() 1493 line = line[len(tokens[1]) :].lstrip()
1381 self.current_token = line 1494 self.current_token = line
1382 self.process(prev_keywords[tokens[1]]) 1495 self.process(prev_keywords[tokens[1]])
1383 1496
1384 else: 1497 else:
1385 raise IOError('Syntax error in po file %s (line %s)' % 1498 raise IOError(
1386 (self.instance.fpath, self.current_line)) 1499 'Syntax error in po file %s (line %s)'
1387 1500 % (self.instance.fpath, self.current_line)
1388 if self.current_entry and len(tokens) > 0 and \ 1501 )
1389 not tokens[0].startswith('#'): 1502
1503 if (
1504 self.current_entry
1505 and len(tokens) > 0
1506 and not tokens[0].startswith('#')
1507 ):
1390 # since entries are added when another entry is found, we must add 1508 # since entries are added when another entry is found, we must add
1391 # the last entry here (only if there are lines). Trailing comments 1509 # the last entry here (only if there are lines). Trailing comments
1392 # are ignored 1510 # are ignored
1393 self.instance.append(self.current_entry) 1511 self.instance.append(self.current_entry)
1394 1512
1447 try: 1565 try:
1448 (action, state) = self.transitions[(symbol, self.current_state)] 1566 (action, state) = self.transitions[(symbol, self.current_state)]
1449 if action(): 1567 if action():
1450 self.current_state = state 1568 self.current_state = state
1451 except Exception: 1569 except Exception:
1452 raise IOError('Syntax error in po file (line %s)' % 1570 raise IOError(
1453 self.current_line) 1571 'Syntax error in po file (line %s)' % self.current_line
1572 )
1454 1573
1455 # state handlers 1574 # state handlers
1456 1575
1457 def handle_he(self): 1576 def handle_he(self):
1458 """Handle a header comment.""" 1577 """Handle a header comment."""
1505 def handle_fl(self): 1624 def handle_fl(self):
1506 """Handle a flags line.""" 1625 """Handle a flags line."""
1507 if self.current_state in ['mc', 'ms', 'mx']: 1626 if self.current_state in ['mc', 'ms', 'mx']:
1508 self.instance.append(self.current_entry) 1627 self.instance.append(self.current_entry)
1509 self.current_entry = POEntry(linenum=self.current_line) 1628 self.current_entry = POEntry(linenum=self.current_line)
1510 self.current_entry.flags += [c.strip() for c in 1629 self.current_entry.flags += [
1511 self.current_token[3:].split(',')] 1630 c.strip() for c in self.current_token[3:].split(',')
1631 ]
1512 return True 1632 return True
1513 1633
1514 def handle_pp(self): 1634 def handle_pp(self):
1515 """Handle a previous msgid_plural line.""" 1635 """Handle a previous msgid_plural line."""
1516 if self.current_state in ['mc', 'ms', 'mx']: 1636 if self.current_state in ['mc', 'ms', 'mx']:
1517 self.instance.append(self.current_entry) 1637 self.instance.append(self.current_entry)
1518 self.current_entry = POEntry(linenum=self.current_line) 1638 self.current_entry = POEntry(linenum=self.current_line)
1519 self.current_entry.previous_msgid_plural = \ 1639 self.current_entry.previous_msgid_plural = unescape(
1520 unescape(self.current_token[1:-1]) 1640 self.current_token[1:-1]
1641 )
1521 return True 1642 return True
1522 1643
1523 def handle_pm(self): 1644 def handle_pm(self):
1524 """Handle a previous msgid line.""" 1645 """Handle a previous msgid line."""
1525 if self.current_state in ['mc', 'ms', 'mx']: 1646 if self.current_state in ['mc', 'ms', 'mx']:
1526 self.instance.append(self.current_entry) 1647 self.instance.append(self.current_entry)
1527 self.current_entry = POEntry(linenum=self.current_line) 1648 self.current_entry = POEntry(linenum=self.current_line)
1528 self.current_entry.previous_msgid = \ 1649 self.current_entry.previous_msgid = unescape(self.current_token[1:-1])
1529 unescape(self.current_token[1:-1])
1530 return True 1650 return True
1531 1651
1532 def handle_pc(self): 1652 def handle_pc(self):
1533 """Handle a previous msgctxt line.""" 1653 """Handle a previous msgctxt line."""
1534 if self.current_state in ['mc', 'ms', 'mx']: 1654 if self.current_state in ['mc', 'ms', 'mx']:
1535 self.instance.append(self.current_entry) 1655 self.instance.append(self.current_entry)
1536 self.current_entry = POEntry(linenum=self.current_line) 1656 self.current_entry = POEntry(linenum=self.current_line)
1537 self.current_entry.previous_msgctxt = \ 1657 self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1])
1538 unescape(self.current_token[1:-1])
1539 return True 1658 return True
1540 1659
1541 def handle_ct(self): 1660 def handle_ct(self):
1542 """Handle a msgctxt.""" 1661 """Handle a msgctxt."""
1543 if self.current_state in ['mc', 'ms', 'mx']: 1662 if self.current_state in ['mc', 'ms', 'mx']:
1566 return True 1685 return True
1567 1686
1568 def handle_mx(self): 1687 def handle_mx(self):
1569 """Handle a msgstr plural.""" 1688 """Handle a msgstr plural."""
1570 index = self.current_token[7] 1689 index = self.current_token[7]
1571 value = self.current_token[self.current_token.find('"') + 1:-1] 1690 value = self.current_token[self.current_token.find('"') + 1 : -1]
1572 self.current_entry.msgstr_plural[int(index)] = unescape(value) 1691 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1573 self.msgstr_index = int(index) 1692 self.msgstr_index = int(index)
1574 return True 1693 return True
1575 1694
1576 def handle_mc(self): 1695 def handle_mc(self):
1592 self.current_entry.previous_msgid += token 1711 self.current_entry.previous_msgid += token
1593 elif self.current_state == 'pc': 1712 elif self.current_state == 'pc':
1594 self.current_entry.previous_msgctxt += token 1713 self.current_entry.previous_msgctxt += token
1595 # don't change the current state 1714 # don't change the current state
1596 return False 1715 return False
1716
1717
1597 # }}} 1718 # }}}
1598 # class _MOFileParser {{{ 1719 # class _MOFileParser {{{
1599 1720
1600 1721
1601 class _MOFileParser(object): 1722 class _MOFileParser(object):
1626 if klass is None: 1747 if klass is None:
1627 klass = MOFile 1748 klass = MOFile
1628 self.instance = klass( 1749 self.instance = klass(
1629 fpath=mofile, 1750 fpath=mofile,
1630 encoding=kwargs.get('encoding', default_encoding), 1751 encoding=kwargs.get('encoding', default_encoding),
1631 check_for_duplicates=kwargs.get('check_for_duplicates', False) 1752 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1632 ) 1753 )
1633 1754
1634 def __del__(self): 1755 def __del__(self):
1635 """ 1756 """
1636 Make sure the file is closed, this prevents warnings on unclosed file 1757 Make sure the file is closed, this prevents warnings on unclosed file
1697 msgid_tokens = msgid.split(b('\0')) 1818 msgid_tokens = msgid.split(b('\0'))
1698 if len(msgid_tokens) > 1: 1819 if len(msgid_tokens) > 1:
1699 entry = self._build_entry( 1820 entry = self._build_entry(
1700 msgid=msgid_tokens[0], 1821 msgid=msgid_tokens[0],
1701 msgid_plural=msgid_tokens[1], 1822 msgid_plural=msgid_tokens[1],
1702 msgstr_plural=dict((k, v) for k, v in 1823 msgstr_plural=dict(
1703 enumerate(msgstr.split(b('\0')))) 1824 (k, v) for k, v in enumerate(msgstr.split(b('\0')))
1825 ),
1704 ) 1826 )
1705 else: 1827 else:
1706 entry = self._build_entry(msgid=msgid, msgstr=msgstr) 1828 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1707 self.instance.append(entry) 1829 self.instance.append(entry)
1708 # close opened file 1830 # close opened file
1709 self.fhandle.close() 1831 self.fhandle.close()
1710 return self.instance 1832 return self.instance
1711 1833
1712 def _build_entry(self, msgid, msgstr=None, msgid_plural=None, 1834 def _build_entry(
1713 msgstr_plural=None): 1835 self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None
1836 ):
1714 msgctxt_msgid = msgid.split(b('\x04')) 1837 msgctxt_msgid = msgid.split(b('\x04'))
1715 encoding = self.instance.encoding 1838 encoding = self.instance.encoding
1716 if len(msgctxt_msgid) > 1: 1839 if len(msgctxt_msgid) > 1:
1717 kwargs = { 1840 kwargs = {
1718 'msgctxt': msgctxt_msgid[0].decode(encoding), 1841 'msgctxt': msgctxt_msgid[0].decode(encoding),
1738 bytes = self.fhandle.read(numbytes) 1861 bytes = self.fhandle.read(numbytes)
1739 tup = struct.unpack(fmt, bytes) 1862 tup = struct.unpack(fmt, bytes)
1740 if len(tup) == 1: 1863 if len(tup) == 1:
1741 return tup[0] 1864 return tup[0]
1742 return tup 1865 return tup
1866
1867
1743 # }}} 1868 # }}}
1744 # class TextWrapper {{{ 1869 # class TextWrapper {{{
1745 1870
1746 1871
1747 class TextWrapper(textwrap.TextWrapper): 1872 class TextWrapper(textwrap.TextWrapper):
1748 """ 1873 """
1749 Subclass of textwrap.TextWrapper that backport the 1874 Subclass of textwrap.TextWrapper that backport the
1750 drop_whitespace option. 1875 drop_whitespace option.
1751 """ 1876 """
1877
1752 def __init__(self, *args, **kwargs): 1878 def __init__(self, *args, **kwargs):
1753 drop_whitespace = kwargs.pop('drop_whitespace', True) 1879 drop_whitespace = kwargs.pop('drop_whitespace', True)
1754 textwrap.TextWrapper.__init__(self, *args, **kwargs) 1880 textwrap.TextWrapper.__init__(self, *args, **kwargs)
1755 self.drop_whitespace = drop_whitespace 1881 self.drop_whitespace = drop_whitespace
1756 1882
1821 # of all lines (return value). 1947 # of all lines (return value).
1822 if cur_line: 1948 if cur_line:
1823 lines.append(indent + ''.join(cur_line)) 1949 lines.append(indent + ''.join(cur_line))
1824 1950
1825 return lines 1951 return lines
1952
1953
1826 # }}} 1954 # }}}
1827 # function wrap() {{{ 1955 # function wrap() {{{
1828 1956
1829 1957
1830 def wrap(text, width=70, **kwargs): 1958 def wrap(text, width=70, **kwargs):
1833 """ 1961 """
1834 if sys.version_info < (2, 6): 1962 if sys.version_info < (2, 6):
1835 return TextWrapper(width=width, **kwargs).wrap(text) 1963 return TextWrapper(width=width, **kwargs).wrap(text)
1836 return textwrap.wrap(text, width=width, **kwargs) 1964 return textwrap.wrap(text, width=width, **kwargs)
1837 1965
1966
1838 # }}} 1967 # }}}