convert/darcs: handle non-ASCII metadata in darcs changelog (issue2354)
Given a commit author or message with non-ASCII characters in a darcs
repo, convert would raise a UnicodeEncodeError when adding changesets
to the hg changelog.
This happened because etree returns back unicode objects for any text
it can't encode into ASCII. convert was passing these objects to
changelog.add(), which would then attempt encoding.fromlocal() on
them.
This patch ensures converter_source.recode() is called on each piece
of commit data returned by etree.
(Also note that darcs is currently encoding agnostic and will print
out whatever is in a patch's metadata byte-for-byte, even in the XML
changelog.)
#!/bin/sh
"$TESTDIR/hghave" cvs || exit 80
cvscall()
{
cvs -f "$@"
}
hgcat()
{
hg --cwd src-hg cat -r tip "$1"
}
echo "[extensions]" >> $HGRCPATH
echo "convert = " >> $HGRCPATH
echo "graphlog = " >> $HGRCPATH
cat > cvshooks.py <<EOF
def cvslog(ui,repo,hooktype,log):
print "%s hook: %d entries"%(hooktype,len(log))
def cvschangesets(ui,repo,hooktype,changesets):
print "%s hook: %d changesets"%(hooktype,len(changesets))
EOF
hookpath=`pwd`
echo "[hooks]" >> $HGRCPATH
echo "cvslog=python:$hookpath/cvshooks.py:cvslog" >> $HGRCPATH
echo "cvschangesets=python:$hookpath/cvshooks.py:cvschangesets" >> $HGRCPATH
echo % create cvs repository
mkdir cvsrepo
cd cvsrepo
CVSROOT=`pwd`
export CVSROOT
CVS_OPTIONS=-f
export CVS_OPTIONS
cd ..
cvscall -q -d "$CVSROOT" init
echo % create source directory
mkdir src-temp
cd src-temp
echo a > a
mkdir b
cd b
echo c > c
cd ..
echo % import source directory
cvscall -q import -m import src INITIAL start
cd ..
echo % checkout source directory
cvscall -q checkout src
echo % commit a new revision changing b/c
cd src
sleep 1
echo c >> b/c
cvscall -q commit -mci0 . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
cd ..
echo % convert fresh repo
hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat a
hgcat b/c
echo % convert fresh repo with --filemap
echo include b/c > filemap
hg convert --filemap filemap src src-filemap | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat b/c
hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
echo % 'convert full repository (issue1649)'
cvscall -q -d "$CVSROOT" checkout -d srcfull "." | grep -v CVSROOT
ls srcfull
hg convert srcfull srcfull-hg \
| sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g' \
| grep -v 'log entries' | grep -v 'hook:' \
| grep -v '^[0-3] .*' # filter instable changeset order
hg cat -r tip srcfull-hg/src/a
hg cat -r tip srcfull-hg/src/b/c
echo % commit new file revisions
cd src
echo a >> a
echo c >> b/c
cvscall -q commit -mci1 . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
cd ..
echo % convert again
hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat a
hgcat b/c
echo % convert again with --filemap
hg convert --filemap filemap src src-filemap | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat b/c
hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
echo % commit branch
cd src
cvs -q update -r1.1 b/c
cvs -q tag -b branch
cvs -q update -r branch > /dev/null
echo d >> b/c
cvs -q commit -mci2 . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
cd ..
echo % convert again
hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat b/c
echo % convert again with --filemap
hg convert --filemap filemap src src-filemap | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hgcat b/c
hg -R src-filemap log --template '{rev} {desc} files: {files}\n'
echo % commit a new revision with funny log message
cd src
sleep 1
echo e >> a
cvscall -q commit -m'funny
----------------------------
log message' . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
cd ..
echo % commit new file revisions with some fuzz
cd src
sleep 1
echo f >> a
cvscall -q commit -mfuzzy . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
sleep 4 # the two changes will be split if fuzz < 4
echo g >> b/c
cvscall -q commit -mfuzzy . | grep '<--' |\
sed -e 's:.*src/\(.*\),v.*:checking in src/\1,v:g'
cd ..
echo % convert again
hg convert --config convert.cvsps.fuzz=2 src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
hg -R src-hg glog --template '{rev} ({branches}) {desc} files: {files}\n'
echo % testing debugcvsps
cd src
hg debugcvsps --fuzz=2 | sed -e 's/Author:.*/Author:/' -e 's/Date:.*/Date:/'