Mercurial > hg
view i18n/posplit @ 49568:da48f170d203
rust-status: query fs traversal metadata lazily
Currently, any time the status algorithm needs to read a directory from the
filesystem (because the stat-only optimization is not available), it also
stats each directory entry eagerly.
Stat'ing the entries is only needed in a few cases (like when checking
the mtime of a directory for caching): this patch creates a wrapper struct
`DirEntry` that only stats the directory entry it represents when needed.
Excerpt of an `strace` before this change on Mozilla Central:
```
openat(AT_FDCWD, ".", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
newfstatat(3, "", {st_mode=S_IFDIR|0755, st_size=3540, ...}, AT_EMPTY_PATH) = 0
getdents64(3, 0x55dc970bd440 /* 139 entries */, 32768) = 5072
statx(3, ".hg", AT_STATX_SYNC_AS_STAT|AT_SYMLINK_NOFOLLOW, STATX_ALL, {stx_mask=STATX_ALL|STATX_MNT_ID, stx_attributes=0, stx_mode=S_IFDIR|0755, stx_size=772, ...}) = 0
[... 135 other successful `statx` calls]
getdents64(3, 0x55dc970bd440 /* 0 entries */, 32768) = 0
close(3) = 0
```
After this change:
```
openat(AT_FDCWD, ".", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
newfstatat(3, "", {st_mode=S_IFDIR|0755, st_size=3540, ...}, AT_EMPTY_PATH) = 0
getdents64(3, 0x561567c10190 /* 139 entries */, 32768) = 5072
getdents64(3, 0x561567c10190 /* 0 entries */, 32768) = 0
close(3) = 0
```
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 19 Oct 2022 12:38:06 +0200 |
parents | 6000f5b25c9b |
children | f4733654f144 |
line wrap: on
line source
#!/usr/bin/env python3 # # posplit - split messages in paragraphs on .po/.pot files # # license: MIT/X11/Expat # import polib import re import sys def addentry(po, entry, cache): e = cache.get(entry.msgid) if e: e.occurrences.extend(entry.occurrences) # merge comments from entry for comment in entry.comment.split('\n'): if comment and comment not in e.comment: if not e.comment: e.comment = comment else: e.comment += '\n' + comment else: po.append(entry) cache[entry.msgid] = entry def mkentry(orig, delta, msgid, msgstr): entry = polib.POEntry() entry.merge(orig) entry.msgid = msgid or orig.msgid entry.msgstr = msgstr or orig.msgstr entry.occurrences = [(p, int(l) + delta) for (p, l) in orig.occurrences] return entry if __name__ == "__main__": po = polib.pofile(sys.argv[1]) cache = {} entries = po[:] po[:] = [] findd = re.compile(r' *\.\. (\w+)::') # for finding directives for entry in entries: msgids = entry.msgid.split(u'\n\n') if entry.msgstr: msgstrs = entry.msgstr.split(u'\n\n') else: msgstrs = [u''] * len(msgids) if len(msgids) != len(msgstrs): # places the whole existing translation as a fuzzy # translation for each paragraph, to give the # translator a chance to recover part of the old # translation - erasing extra paragraphs is # probably better than retranslating all from start if 'fuzzy' not in entry.flags: entry.flags.append('fuzzy') msgstrs = [entry.msgstr] * len(msgids) delta = 0 for msgid, msgstr in zip(msgids, msgstrs): if msgid and msgid != '::': newentry = mkentry(entry, delta, msgid, msgstr) mdirective = findd.match(msgid) if mdirective: if not msgid[mdirective.end() :].rstrip(): # only directive, nothing to translate here delta += 2 continue directive = mdirective.group(1) if directive in ('container', 'include'): if msgid.rstrip('\n').count('\n') == 0: # only rst syntax, nothing to translate delta += 2 continue else: # lines following directly, unexpected print( 'Warning: text follows line with directive' ' %s' % directive ) comment = 'do not translate: .. %s::' % directive if not newentry.comment: newentry.comment = comment elif comment not in newentry.comment: newentry.comment += '\n' + comment addentry(po, newentry, cache) delta += 2 + msgid.count('\n') po.save()