view tests/test-gendoc.t @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents 5abc47d4ca6b
children
line wrap: on
line source

#require docutils
#require gettext

Test document extraction

  $ HGENCODING=UTF-8
  $ export HGENCODING
  $ { echo C; ls "$TESTDIR/../i18n"/*.po | sort; } | while read PO; do
  >     LOCALE=`basename "$PO" .po`
  >     echo "% extracting documentation from $LOCALE"
  >     LANGUAGE=$LOCALE "$PYTHON" "$TESTDIR/../doc/gendoc.py" >> gendoc-$LOCALE.txt 2> /dev/null || exit
  > 
  >     if [ $LOCALE != C ]; then
  >         if [ ! -f $TESTDIR/test-gendoc-$LOCALE.t ]; then
  >             echo missing test-gendoc-$LOCALE.t
  >         fi
  >         cmp -s gendoc-C.txt gendoc-$LOCALE.txt && echo "** NOTHING TRANSLATED ($LOCALE) **"
  >     fi
  > done; true
  % extracting documentation from C
  % extracting documentation from da
  % extracting documentation from de
  % extracting documentation from el
  % extracting documentation from fr
  % extracting documentation from it
  % extracting documentation from ja
  % extracting documentation from pt_BR
  % extracting documentation from ro
  % extracting documentation from ru
  % extracting documentation from sv
  % extracting documentation from zh_CN
  % extracting documentation from zh_TW