view tests/svn/svndump-encoding.sh @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents f3398f1f70a0
children
line wrap: on
line source

#!/bin/sh
# -*- coding: utf-8 -*-
#
# Use this script to generate encoding.svndump
#

mkdir temp
cd temp

mkdir project-orig
cd project-orig
mkdir trunk
mkdir branches
mkdir tags
cd ..

svnadmin create svn-repo
svnurl=file://`pwd`/svn-repo
svn import project-orig $svnurl -m "init projA"

svn co $svnurl project
cd project
echo e > trunk/é
mkdir trunk/à
echo d > trunk/à/é
svn add trunk/é trunk/à
svn ci -m hello

# Copy files and directories
svn mv trunk/é trunk/è
svn mv trunk/à trunk/ù
svn ci -m "copy files"

# Remove files
svn rm trunk/è
svn rm trunk/ù
svn ci -m 'remove files'

# Create branches with and from weird names
svn up
svn cp trunk branches/branché
echo a > branches/branché/a
svn ci -m 'branch to branché'
svn up
svn cp branches/branché branches/branchée
echo a >> branches/branché/a
svn ci -m 'branch to branchée'

# Create tag with weird name
svn up
svn cp trunk tags/branché
svn ci -m 'tag trunk'
svn cp branches/branchée tags/branchée
svn ci -m 'tag branché'
cd ..

svnadmin dump svn-repo > ../encoding.svndump