tests/test-encoding.t
changeset 12417 1aba1c38a85b
parent 12156 4c94b6d0fb1c
child 12866 eddc20306ab6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-encoding.t	Sun Sep 26 13:41:32 2010 -0500
@@ -0,0 +1,245 @@
+Test character encoding
+
+  $ hg init t
+  $ cd t
+
+we need a repo with some legacy latin-1 changesets
+
+  $ hg unbundle $TESTDIR/legacy-encoding.hg
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 1 files
+  (run 'hg update' to get a working copy)
+  $ hg co
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ python << EOF
+  > f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
+  > f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
+  > f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
+  > EOF
+
+should fail with encoding error
+
+  $ echo "plain old ascii" > a
+  $ hg st
+  M a
+  ? latin-1
+  ? latin-1-tag
+  ? utf-8
+  $ HGENCODING=ascii hg ci -l latin-1
+  transaction abort!
+  rollback completed
+  abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
+  [255]
+
+these should work
+
+  $ echo "latin-1" > a
+  $ HGENCODING=latin-1 hg ci -l latin-1
+  $ echo "utf-8" > a
+  $ HGENCODING=utf-8 hg ci -l utf-8
+  $ HGENCODING=latin-1 hg tag `cat latin-1-tag`
+  $ HGENCODING=latin-1 hg branch `cat latin-1-tag`
+  marked working directory as branch é
+  $ HGENCODING=latin-1 hg ci -m 'latin1 branch'
+  $ rm .hg/branch
+
+hg log (ascii)
+
+  $ hg --encoding ascii log
+  changeset:   5:093c6077d1c8
+  branch:      ?
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin1 branch
+  
+  changeset:   4:94db611b4196
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     Added tag ? for changeset ca661e7520de
+  
+  changeset:   3:ca661e7520de
+  tag:         ?
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     utf-8 e' encoded: ?
+  
+  changeset:   2:650c6f3d55dd
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin-1 e' encoded: ?
+  
+  changeset:   1:0e5b7e3f9c4a
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
+  
+  changeset:   0:1e78a93102a3
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     latin-1 e': ? = u'\xe9'
+  
+
+hg log (latin-1)
+
+  $ hg --encoding latin-1 log
+  changeset:   5:093c6077d1c8
+  branch:      é
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin1 branch
+  
+  changeset:   4:94db611b4196
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     Added tag é for changeset ca661e7520de
+  
+  changeset:   3:ca661e7520de
+  tag:         é
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     utf-8 e' encoded: é
+  
+  changeset:   2:650c6f3d55dd
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin-1 e' encoded: é
+  
+  changeset:   1:0e5b7e3f9c4a
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     koi8-r: ÒÔÕÔØ = u'\u0440\u0442\u0443\u0442\u044c'
+  
+  changeset:   0:1e78a93102a3
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     latin-1 e': é = u'\xe9'
+  
+
+hg log (utf-8)
+
+  $ hg --encoding utf-8 log
+  changeset:   5:093c6077d1c8
+  branch:      é
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin1 branch
+  
+  changeset:   4:94db611b4196
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     Added tag é for changeset ca661e7520de
+  
+  changeset:   3:ca661e7520de
+  tag:         é
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     utf-8 e' encoded: é
+  
+  changeset:   2:650c6f3d55dd
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin-1 e' encoded: é
+  
+  changeset:   1:0e5b7e3f9c4a
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     koi8-r: ÒÔÕÔØ = u'\u0440\u0442\u0443\u0442\u044c'
+  
+  changeset:   0:1e78a93102a3
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     latin-1 e': é = u'\xe9'
+  
+
+hg tags (ascii)
+
+  $ HGENCODING=ascii hg tags
+  tip                                5:093c6077d1c8
+  ?                                  3:ca661e7520de
+
+hg tags (latin-1)
+
+  $ HGENCODING=latin-1 hg tags
+  tip                                5:093c6077d1c8
+  é                                 3:ca661e7520de
+
+hg tags (utf-8)
+
+  $ HGENCODING=utf-8 hg tags
+  tip                                5:093c6077d1c8
+  é                                 3:ca661e7520de
+
+hg branches (ascii)
+
+  $ HGENCODING=ascii hg branches
+  ?                              5:093c6077d1c8
+  default                        4:94db611b4196 (inactive)
+
+hg branches (latin-1)
+
+  $ HGENCODING=latin-1 hg branches
+  é                             5:093c6077d1c8
+  default                        4:94db611b4196 (inactive)
+
+hg branches (utf-8)
+
+  $ HGENCODING=utf-8 hg branches
+  é                             5:093c6077d1c8
+  default                        4:94db611b4196 (inactive)
+  $ echo '[ui]' >> .hg/hgrc
+  $ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
+
+hg log (utf-8)
+
+  $ HGENCODING=utf-8 hg log
+  changeset:   5:093c6077d1c8
+  branch:      é
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin1 branch
+  
+  changeset:   4:94db611b4196
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     Added tag é for changeset ca661e7520de
+  
+  changeset:   3:ca661e7520de
+  tag:         é
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     utf-8 e' encoded: é
+  
+  changeset:   2:650c6f3d55dd
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     latin-1 e' encoded: é
+  
+  changeset:   1:0e5b7e3f9c4a
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     koi8-r: ртуть = u'\u0440\u0442\u0443\u0442\u044c'
+  
+  changeset:   0:1e78a93102a3
+  user:        test
+  date:        Mon Jan 12 13:46:40 1970 +0000
+  summary:     latin-1 e': И = u'\xe9'
+  
+
+hg log (dolphin)
+
+  $ HGENCODING=dolphin hg log
+  abort: unknown encoding: dolphin, please check your locale settings
+  [255]
+  $ HGENCODING=ascii hg branch `cat latin-1-tag`
+  abort: decoding near 'é': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)!
+  [255]
+  $ cp latin-1-tag .hg/branch
+  $ HGENCODING=latin-1 hg ci -m 'should fail'
+  abort: branch name not in UTF-8!
+  [255]