--- a/doc/hg.1.txt Mon Dec 04 09:38:49 2006 +0100
+++ b/doc/hg.1.txt Mon Dec 04 17:43:37 2006 -0600
@@ -172,6 +172,20 @@
(deprecated, use .hgrc)
+HGENCODING::
+ This overrides the default locale setting detected by Mercurial.
+ This setting is used to convert data including usernames,
+ changeset descriptions, tag names, and branches. This setting can
+ be overridden with the --encoding command-line option.
+
+HGENCODINGMODE::
+ This sets Mercurial's behavior for handling unknown characters
+ while transcoding user inputs. The default is "strict", which
+ causes Mercurial to abort if it can't translate a character. Other
+ settings include "replace", which replaces unknown characters, and
+ "ignore", which drops them. This setting can be overridden with
+ the --encodingmode command-line option.
+
HGMERGE::
An executable to use for resolving merge conflicts. The program
will be executed with three arguments: local file, remote file,
--- a/hgweb.cgi Mon Dec 04 09:38:49 2006 +0100
+++ b/hgweb.cgi Mon Dec 04 17:43:37 2006 -0600
@@ -10,6 +10,13 @@
from mercurial.hgweb.request import wsgiapplication
import mercurial.hgweb.wsgicgi as wsgicgi
+# If you'd like to serve pages with UTF-8 instead of your default
+# locale charset, you can do so by uncommenting the following lines.
+# Note that this will cause your .hgrc files to be interpreted in
+# UTF-8 and all your repo files to be displayed using UTF-8.
+#
+# os.environ["HGENCODING"] = "UTF-8"
+
def make_web_app():
return hgweb("/path/to/repo", "repository name")
--- a/hgwebdir.cgi Mon Dec 04 09:38:49 2006 +0100
+++ b/hgwebdir.cgi Mon Dec 04 17:43:37 2006 -0600
@@ -29,6 +29,13 @@
# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples
# or use a dictionary with entries like 'virtual/path': '/real/path'
+# If you'd like to serve pages with UTF-8 instead of your default
+# locale charset, you can do so by uncommenting the following lines.
+# Note that this will cause your .hgrc files to be interpreted in
+# UTF-8 and all your repo files to be displayed using UTF-8.
+#
+# os.environ["HGENCODING"] = "UTF-8"
+
def make_web_app():
return hgwebdir("hgweb.config")
--- a/mercurial/changelog.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/changelog.py Mon Dec 04 17:43:37 2006 -0600
@@ -61,10 +61,10 @@
if not text:
return (nullid, "", (0, 0), [], "", {})
last = text.index("\n\n")
- desc = text[last + 2:]
+ desc = util.tolocal(text[last + 2:])
l = text[:last].split('\n')
manifest = bin(l[0])
- user = l[1]
+ user = util.tolocal(l[1])
extra_data = l[2].split(' ', 2)
if len(extra_data) != 3:
@@ -88,6 +88,8 @@
def add(self, manifest, list, desc, transaction, p1=None, p2=None,
user=None, date=None, extra={}):
+ user, desc = util.fromlocal(user), util.fromlocal(desc)
+
if date:
parseddate = "%d %d" % util.parsedate(date)
else:
--- a/mercurial/commands.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/commands.py Mon Dec 04 17:43:37 2006 -0600
@@ -8,7 +8,7 @@
from demandload import demandload
from node import *
from i18n import gettext as _
-demandload(globals(), "os re sys signal imp urllib pdb shlex")
+demandload(globals(), "bisect os re sys signal imp urllib pdb shlex stat")
demandload(globals(), "fancyopts ui hg util lock revlog bundlerepo")
demandload(globals(), "difflib patch time")
demandload(globals(), "traceback errno version atexit")
@@ -273,7 +273,9 @@
if ui.quiet:
ui.write("%s\n" % t)
else:
- ui.write("%-30s %s:%s\n" % (t, -r, hexfunc(n)))
+ t = util.localsub(t, 30)
+ t += " " * (30 - util.locallen(t))
+ ui.write("%s %s:%s\n" % (t, -r, hexfunc(n)))
def bundle(ui, repo, fname, dest=None, **opts):
"""create a changegroup file
@@ -421,12 +423,28 @@
status = repo.status(files=fns, match=match)
modified, added, removed, deleted, unknown = status[:5]
files = modified + added + removed
+ slist = None
for f in fns:
- if f not in modified + added + removed:
+ if f not in files:
+ rf = repo.wjoin(f)
if f in unknown:
- raise util.Abort(_("file %s not tracked!") % f)
- else:
- raise util.Abort(_("file %s not found!") % f)
+ raise util.Abort(_("file %s not tracked!") % rf)
+ try:
+ mode = os.lstat(rf)[stat.ST_MODE]
+ except OSError:
+ raise util.Abort(_("file %s not found!") % rf)
+ if stat.S_ISDIR(mode):
+ name = f + '/'
+ if slist is None:
+ slist = list(files)
+ slist.sort()
+ i = bisect.bisect(slist, name)
+ if i >= len(slist) or not slist[i].startswith(name):
+ raise util.Abort(_("no match under directory %s!")
+ % rf)
+ elif not stat.S_ISREG(mode):
+ raise util.Abort(_("can't commit %s: "
+ "unsupported file type!") % rf)
else:
files = []
try:
@@ -2210,7 +2228,9 @@
if ui.quiet:
ui.write("%s\n" % t)
else:
- ui.write("%-30s %s\n" % (t, r))
+ t = util.localsub(t, 30)
+ t += " " * (30 - util.locallen(t))
+ ui.write("%s %s\n" % (t, r))
def tip(ui, repo, **opts):
"""show the tip revision
@@ -2311,6 +2331,8 @@
('', 'config', [], _('set/override config option')),
('', 'debug', None, _('enable debugging output')),
('', 'debugger', None, _('start debugger')),
+ ('', 'encoding', util._encoding, _('set the charset encoding')),
+ ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')),
('', 'lsprof', None, _('print improved command execution profile')),
('', 'traceback', None, _('print traceback on exception')),
('', 'time', None, _('time how long the command takes')),
@@ -2863,6 +2885,10 @@
try:
cmd, func, args, options, cmdoptions = parse(u, args)
+ if options["encoding"]:
+ util._encoding = options["encoding"]
+ if options["encodingmode"]:
+ util._encodingmode = options["encodingmode"]
if options["time"]:
def get_times():
t = os.times()
--- a/mercurial/hgweb/hgweb_mod.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/hgweb/hgweb_mod.py Mon Dec 04 17:43:37 2006 -0600
@@ -654,7 +654,8 @@
def run_wsgi(self, req):
def header(**map):
- header_file = cStringIO.StringIO(''.join(self.t("header", **map)))
+ header_file = cStringIO.StringIO(
+ ''.join(self.t("header", encoding = util._encoding, **map)))
msg = mimetools.Message(header_file, 0)
req.header(msg.items())
yield header_file.read()
--- a/mercurial/localrepo.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/localrepo.py Mon Dec 04 17:43:37 2006 -0600
@@ -198,6 +198,7 @@
self.hook('pretag', throw=True, node=hex(node), tag=name, local=local)
if local:
+ # local tags are stored in the current charset
self.opener('localtags', 'a').write('%s %s\n' % (hex(node), name))
self.hook('tag', node=hex(node), tag=name, local=local)
return
@@ -207,7 +208,9 @@
raise util.Abort(_('working copy of .hgtags is changed '
'(please commit .hgtags manually)'))
- self.wfile('.hgtags', 'ab').write('%s %s\n' % (hex(node), name))
+ # committed tags are stored in UTF-8
+ line = '%s %s\n' % (hex(node), util.fromlocal(name))
+ self.wfile('.hgtags', 'ab').write(line)
if self.dirstate.state('.hgtags') == '?':
self.add(['.hgtags'])
@@ -227,7 +230,7 @@
self.ui.warn(_("%s: cannot parse entry\n") % context)
return
node, key = s
- key = key.strip()
+ key = util.tolocal(key.strip()) # stored in UTF-8
try:
bin_n = bin(node)
except TypeError:
@@ -256,6 +259,9 @@
f = self.opener("localtags")
count = 0
for l in f:
+ # localtags are stored in the local character set
+ # while the internal tag table is stored in UTF-8
+ l = util.fromlocal(l)
count += 1
parsetag(l, _("localtags, line %d") % count)
except IOError:
@@ -316,7 +322,10 @@
self._updatebranchcache(partial, lrev+1, tiprev+1)
self._writebranchcache(partial, self.changelog.tip(), tiprev)
- self.branchcache = partial
+ # the branch cache is stored on disk as UTF-8, but in the local
+ # charset internally
+ for k, v in partial.items():
+ self.branchcache[util.tolocal(k)] = v
return self.branchcache
def _readbranchcache(self):
@@ -627,12 +636,12 @@
m2 = self.manifest.read(c2[0])
if use_dirstate:
- branchname = self.workingctx().branch()
+ branchname = util.fromlocal(self.workingctx().branch())
else:
branchname = ""
if use_dirstate:
- oldname = c1[5].get("branch", "")
+ oldname = c1[5].get("branch", "") # stored in UTF-8
if not commit and not remove and not force and p2 == nullid and \
branchname == oldname:
self.ui.status(_("nothing changed\n"))
--- a/mercurial/merge.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/merge.py Mon Dec 04 17:43:37 2006 -0600
@@ -68,6 +68,16 @@
raise util.Abort(_("untracked local file '%s' differs"\
" from remote version") % f)
+def checkcollision(mctx):
+ "check for case folding collisions in the destination context"
+ folded = {}
+ for fn in mctx.manifest():
+ fold = fn.lower()
+ if fold in folded:
+ raise util.Abort(_("case-folding collision between %s and %s")
+ % (fn, folded[fold]))
+ folded[fold] = fn
+
def forgetremoved(wctx, mctx):
"""
Forget removed files
@@ -460,6 +470,8 @@
action = []
if not force:
checkunknown(wc, p2)
+ if not util.checkfolding(repo.path):
+ checkcollision(p2)
if not branchmerge:
action += forgetremoved(wc, p2)
action += manifestmerge(repo, wc, p2, pa, overwrite, partial)
@@ -477,6 +489,7 @@
repo.dirstate.setparents(fp1, fp2)
repo.hook('update', parent1=xp1, parent2=xp2, error=stats[3])
if not branchmerge:
+ b = util.tolocal(p2.branch())
repo.opener("branch", "w").write(p2.branch() + "\n")
return stats
--- a/mercurial/util.py Mon Dec 04 09:38:49 2006 +0100
+++ b/mercurial/util.py Mon Dec 04 17:43:37 2006 -0600
@@ -15,7 +15,61 @@
from i18n import gettext as _
from demandload import *
demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
-demandload(globals(), "os threading time calendar ConfigParser")
+demandload(globals(), "os threading time calendar ConfigParser locale")
+
+_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
+_encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+
+def tolocal(s):
+ """
+ Convert a string from internal UTF-8 to local encoding
+
+ All internal strings should be UTF-8 but some repos before the
+ implementation of locale support may contain latin1 or possibly
+ other character sets. We attempt to decode everything strictly
+ using UTF-8, then Latin-1, and failing that, we use UTF-8 and
+ replace unknown characters.
+ """
+ for e in "utf-8 latin1".split():
+ try:
+ u = s.decode(e) # attempt strict decoding
+ return u.encode(_encoding, "replace")
+ except UnicodeDecodeError:
+ pass
+ u = s.decode("utf-8", "replace") # last ditch
+ return u.encode(_encoding, "replace")
+
+def fromlocal(s):
+ """
+ Convert a string from the local character encoding to UTF-8
+
+ We attempt to decode strings using the encoding mode set by
+ HG_ENCODINGMODE, which defaults to 'strict'. In this mode, unknown
+ characters will cause an error message. Other modes include
+ 'replace', which replaces unknown characters with a special
+ Unicode character, and 'ignore', which drops the character.
+ """
+ try:
+ return s.decode(_encoding, _encodingmode).encode("utf-8")
+ except UnicodeDecodeError, inst:
+ sub = s[max(0, inst.start-10):inst.start+10]
+ raise Abort("decoding near '%s': %s!\n" % (sub, inst))
+
+def locallen(s):
+ """Find the length in characters of a local string"""
+ return len(s.decode(_encoding, "replace"))
+
+def localsub(s, a, b=None):
+ try:
+ u = s.decode(_encoding, _encodingmode)
+ if b is not None:
+ u = u[a:b]
+ else:
+ u = u[:a]
+ return u.encode(_encoding, _encodingmode)
+ except UnicodeDecodeError, inst:
+ sub = s[max(0, inst.start-10), inst.start+10]
+ raise Abort("decoding near '%s': %s!\n" % (sub, inst))
# used by parsedate
defaultdateformats = ('%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
@@ -579,6 +633,28 @@
except ImportError:
return None
+# File system features
+
+def checkfolding(path):
+ """
+ Check whether the given path is on a case-sensitive filesystem
+
+ Requires a path (like /foo/.hg) ending with a foldable final
+ directory component.
+ """
+ s1 = os.stat(path)
+ d, b = os.path.split(path)
+ p2 = os.path.join(d, b.upper())
+ if path == p2:
+ p2 = os.path.join(d, b.lower())
+ try:
+ s2 = os.stat(p2)
+ if s2 == s1:
+ return False
+ return True
+ except:
+ return True
+
# Platform specific variants
if os.name == 'nt':
demandload(globals(), "msvcrt")
--- a/templates/gitweb/header.tmpl Mon Dec 04 09:38:49 2006 +0100
+++ b/templates/gitweb/header.tmpl Mon Dec 04 17:43:37 2006 -0600
@@ -1,11 +1,10 @@
-Content-type: text/html
+Content-type: text/html; charset={encoding}
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="{encoding}"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US">
<head>
<link rel="icon" href="{url}static/hgicon.png" type="image/png">
-<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
<meta name="robots" content="index, nofollow"/>
<link rel="stylesheet" href="{url}static/style-gitweb.css" type="text/css" />
--- a/templates/header.tmpl Mon Dec 04 09:38:49 2006 +0100
+++ b/templates/header.tmpl Mon Dec 04 17:43:37 2006 -0600
@@ -1,4 +1,4 @@
-Content-type: text/html
+Content-type: text/html; charset={encoding}
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
--- a/templates/raw/header.tmpl Mon Dec 04 09:38:49 2006 +0100
+++ b/templates/raw/header.tmpl Mon Dec 04 17:43:37 2006 -0600
@@ -1,1 +1,2 @@
-Content-type: text/plain
+Content-type: text/plain; charset={encoding}
+
Binary file tests/legacy-encoding.hg has changed
--- a/tests/run-tests.py Mon Dec 04 09:38:49 2006 +0100
+++ b/tests/run-tests.py Mon Dec 04 17:43:37 2006 -0600
@@ -332,6 +332,8 @@
os.environ["HGEDITOR"] = sys.executable + ' -c "import sys; sys.exit(0)"'
os.environ["HGMERGE"] = sys.executable + ' -c "import sys; sys.exit(0)"'
os.environ["HGUSER"] = "test"
+os.environ["HGENCODING"] = "ascii"
+os.environ["HGENCODINGMODE"] = "strict"
TESTDIR = os.environ["TESTDIR"] = os.getcwd()
HGTMP = os.environ["HGTMP"] = tempfile.mkdtemp("", "hgtests.")
--- a/tests/test-commit Mon Dec 04 09:38:49 2006 +0100
+++ b/tests/test-commit Mon Dec 04 17:43:37 2006 -0600
@@ -1,5 +1,10 @@
#!/bin/sh
+cleanpath()
+{
+ sed -e "s:/.*\(/test/.*\):...\1:"
+}
+
echo % commit date test
hg init test
cd test
@@ -17,7 +22,35 @@
echo bar > bar
hg add bar
rm bar
-hg commit -d "1000000 0" -m commit-8 2>&1 | sed -e "s:/.*\(/test/.*\):...\1:"
+hg commit -d "1000000 0" -m commit-8 2>&1 | cleanpath
+
+hg -q revert -a --no-backup
+
+mkdir dir
+echo boo > dir/file
+hg add
+hg -v commit -d '0 0' -m commit-9 dir
+
+echo > dir.file
+hg add
+hg commit -d '0 0' -m commit-10 dir dir.file 2>&1 | cleanpath
+
+echo >> dir/file
+mkdir bleh
+mkdir dir2
+cd bleh
+hg commit -d '0 0' -m commit-11 . 2>&1 | cleanpath
+hg commit -d '0 0' -m commit-12 ../dir ../dir2 2>&1 | cleanpath
+hg -v commit -d '0 0' -m commit-13 ../dir
+cd ..
+
+hg commit -d '0 0' -m commit-14 does-not-exist 2>&1 | cleanpath
+ln -s foo baz
+hg commit -d '0 0' -m commit-15 baz 2>&1 | cleanpath
+touch quux
+hg commit -d '0 0' -m commit-16 quux 2>&1 | cleanpath
+echo >> dir/file
+hg -v commit -d '0 0' -m commit-17 dir/file
cd ..
echo % partial subdir commit test
--- a/tests/test-commit.out Mon Dec 04 09:38:49 2006 +0100
+++ b/tests/test-commit.out Mon Dec 04 17:43:37 2006 -0600
@@ -17,6 +17,19 @@
% partial commit test
trouble committing bar!
abort: No such file or directory: .../test/bar
+adding dir/file
+dir/file
+adding dir.file
+abort: no match under directory .../test/dir!
+abort: no match under directory .../test/bleh!
+abort: no match under directory .../test/dir2!
+dir/file
+does-not-exist: No such file or directory
+abort: file .../test/does-not-exist not found!
+baz: unsupported file type (type is symbolic link)
+abort: can't commit .../test/baz: unsupported file type!
+abort: file .../test/quux not tracked!
+dir/file
% partial subdir commit test
adding bar/bar
adding foo/foo
--- a/tests/test-debugcomplete.out Mon Dec 04 09:38:49 2006 +0100
+++ b/tests/test-debugcomplete.out Mon Dec 04 17:43:37 2006 -0600
@@ -87,6 +87,8 @@
--cwd
--debug
--debugger
+--encoding
+--encodingmode
--help
--lsprof
--noninteractive
@@ -112,6 +114,8 @@
--daemon-pipefds
--debug
--debugger
+--encoding
+--encodingmode
--errorlog
--help
--ipv6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-encoding Mon Dec 04 17:43:37 2006 -0600
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+hg init t
+cd t
+
+# we need a repo with some legacy latin-1 changesets
+hg unbundle $TESTDIR/legacy-encoding.hg
+hg co
+
+printf "latin-1 e' encoded: \xe9" > latin-1
+printf "utf-8 e' encoded: \xc3\xa9" > utf-8
+printf "\xe9" > latin-1-tag
+
+echo % should fail with encoding error
+echo "plain old ascii" > a
+hg st
+HGENCODING=ascii hg ci -l latin-1 -d "0 0"
+
+echo % these should work
+echo "latin-1" > a
+HGENCODING=latin-1 hg ci -l latin-1 -d "0 0"
+echo "utf-8" > a
+HGENCODING=utf-8 hg ci -l utf-8 -d "0 0"
+
+HGENCODING=latin-1 hg tag -d "0 0" `cat latin-1-tag`
+
+echo % ascii
+hg --encoding ascii log
+echo % latin-1
+hg --encoding latin-1 log
+echo % utf-8
+hg --encoding utf-8 log
+echo % ascii
+HGENCODING=ascii hg tags
+echo % latin-1
+HGENCODING=latin-1 hg tags
+echo % utf-8
+HGENCODING=utf-8 hg tags
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-encoding.out Mon Dec 04 17:43:37 2006 -0600
@@ -0,0 +1,94 @@
+adding changesets
+adding manifests
+adding file changes
+added 1 changesets with 1 changes to 1 files
+(run 'hg update' to get a working copy)
+1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+% should fail with encoding error
+M a
+? latin-1
+? latin-1-tag
+? utf-8
+abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
+
+transaction abort!
+rollback completed
+% these should work
+% ascii
+changeset: 3:5edfc7acb541
+tag: tip
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: Added tag ? for changeset 91878608adb3
+
+changeset: 2:91878608adb3
+tag: ?
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: utf-8 e' encoded: ?
+
+changeset: 1:6355cacf842e
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e' encoded: ?
+
+changeset: 0:60aad1dd20a9
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e': ?
+
+% latin-1
+changeset: 3:5edfc7acb541
+tag: tip
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: Added tag é for changeset 91878608adb3
+
+changeset: 2:91878608adb3
+tag: é
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: utf-8 e' encoded: é
+
+changeset: 1:6355cacf842e
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e' encoded: é
+
+changeset: 0:60aad1dd20a9
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e': é
+
+% utf-8
+changeset: 3:5edfc7acb541
+tag: tip
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: Added tag é for changeset 91878608adb3
+
+changeset: 2:91878608adb3
+tag: é
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: utf-8 e' encoded: é
+
+changeset: 1:6355cacf842e
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e' encoded: é
+
+changeset: 0:60aad1dd20a9
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e': é
+
+% ascii
+tip 3:5edfc7acb541
+? 2:91878608adb3
+% latin-1
+tip 3:5edfc7acb541
+é 2:91878608adb3
+% utf-8
+tip 3:5edfc7acb541
+é 2:91878608adb3