changeset 7948:de377b1a9a84

move encoding bits from util to encoding In addition to cleaning up util, this gets rid of some circular dependencies.
author Matt Mackall <mpm@selenic.com>
date Fri, 03 Apr 2009 14:51:48 -0500
parents a454eeb1b827
children 443c0c8636ac
files hgext/convert/convcmd.py hgext/highlight/highlight.py hgext/win32mbcs.py mercurial/changelog.py mercurial/cmdutil.py mercurial/commands.py mercurial/dispatch.py mercurial/encoding.py mercurial/hgweb/hgweb_mod.py mercurial/hgweb/hgwebdir_mod.py mercurial/i18n.py mercurial/localrepo.py mercurial/mail.py mercurial/templatefilters.py mercurial/util.py mercurial/win32.py
diffstat 16 files changed, 149 insertions(+), 136 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/convcmd.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/convert/convcmd.py	Fri Apr 03 14:51:48 2009 -0500
@@ -18,7 +18,7 @@
 import filemap
 
 import os, shutil
-from mercurial import hg, util
+from mercurial import hg, util, encoding
 from mercurial.i18n import _
 
 orig_encoding = 'ascii'
@@ -275,7 +275,7 @@
                 if "\n" in desc:
                     desc = desc.splitlines()[0]
                 # convert log message to local encoding without using
-                # tolocal() because util._encoding conver() use it as
+                # tolocal() because encoding.encoding conver() use it as
                 # 'utf-8'
                 self.ui.status("%d %s\n" % (num, recode(desc)))
                 self.ui.note(_("source: %s\n") % recode(c))
@@ -308,8 +308,8 @@
 
 def convert(ui, src, dest=None, revmapfile=None, **opts):
     global orig_encoding
-    orig_encoding = util._encoding
-    util._encoding = 'UTF-8'
+    orig_encoding = encoding.encoding
+    encoding.encoding = 'UTF-8'
 
     if not dest:
         dest = hg.defaultdest(src) + "-hg"
--- a/hgext/highlight/highlight.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/highlight/highlight.py	Fri Apr 03 14:51:48 2009 -0500
@@ -6,7 +6,7 @@
 from mercurial import demandimport
 demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',])
 
-from mercurial import util
+from mercurial import util, encoding
 from mercurial.templatefilters import filters
 
 from pygments import highlight
@@ -30,19 +30,19 @@
         return
 
     # avoid UnicodeDecodeError in pygments
-    text = util.tolocal(text)
+    text = encoding.tolocal(text)
 
     # To get multi-line strings right, we can't format line-by-line
     try:
         lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
-                                         encoding=util._encoding)
+                                         encoding=encoding.encoding)
     except (ClassNotFound, ValueError):
         try:
-            lexer = guess_lexer(text[:1024], encoding=util._encoding)
+            lexer = guess_lexer(text[:1024], encoding=encoding.encoding)
         except (ClassNotFound, ValueError):
-            lexer = TextLexer(encoding=util._encoding)
+            lexer = TextLexer(encoding=encoding.encoding)
 
-    formatter = HtmlFormatter(style=style, encoding=util._encoding)
+    formatter = HtmlFormatter(style=style, encoding=encoding.encoding)
 
     colorized = highlight(text, lexer, formatter)
     # strip wrapping div
--- a/hgext/win32mbcs.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/hgext/win32mbcs.py	Fri Apr 03 14:51:48 2009 -0500
@@ -36,19 +36,19 @@
   [extensions]
   hgext.win32mbcs =
 
-Path encoding conversion are done between unicode and util._encoding
+Path encoding conversion are done between unicode and encoding.encoding
 which is decided by mercurial from current locale setting or HGENCODING.
 
 """
 
 import os
 from mercurial.i18n import _
-from mercurial import util
+from mercurial import util, encoding
 
 def decode(arg):
     if isinstance(arg, str):
-        uarg = arg.decode(util._encoding)
-        if arg == uarg.encode(util._encoding):
+        uarg = arg.decode(encoding.encoding)
+        if arg == uarg.encode(encoding.encoding):
             return uarg
         raise UnicodeError("Not local encoding")
     elif isinstance(arg, tuple):
@@ -59,7 +59,7 @@
 
 def encode(arg):
     if isinstance(arg, unicode):
-        return arg.encode(util._encoding)
+        return arg.encode(encoding.encoding)
     elif isinstance(arg, tuple):
         return tuple(map(encode, arg))
     elif isinstance(arg, list):
@@ -76,10 +76,10 @@
         # convert arguments to unicode, call func, then convert back
         return encode(func(*decode(args)))
     except UnicodeError:
-        # If not encoded with util._encoding, report it then
+        # If not encoded with encoding.encoding, report it then
         # continue with calling original function.
         raise util.Abort(_("[win32mbcs] filename conversion fail with"
-                         " %s encoding\n") % (util._encoding))
+                         " %s encoding\n") % (encoding.encoding))
 
 def wrapname(name):
     idx = name.rfind('.')
@@ -115,8 +115,9 @@
         return
 
     # fake is only for relevant environment.
-    if util._encoding.lower() in problematic_encodings.split():
+    if encoding.encoding.lower() in problematic_encodings.split():
         for f in funcs.split():
             wrapname(f)
-        ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding)
+        ui.debug(_("[win32mbcs] activated with encoding: %s\n")
+                 % encoding.encoding)
 
--- a/mercurial/changelog.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/changelog.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 
 from node import bin, hex, nullid
 from i18n import _
-import util, error, revlog
+import util, error, revlog, encoding
 
 def _string_escape(text):
     """
@@ -175,10 +175,10 @@
         if not text:
             return (nullid, "", (0, 0), [], "", {'branch': 'default'})
         last = text.index("\n\n")
-        desc = util.tolocal(text[last + 2:])
+        desc = encoding.tolocal(text[last + 2:])
         l = text[:last].split('\n')
         manifest = bin(l[0])
-        user = util.tolocal(l[1])
+        user = encoding.tolocal(l[1])
 
         extra_data = l[2].split(' ', 2)
         if len(extra_data) != 3:
@@ -205,7 +205,7 @@
         if "\n" in user:
             raise error.RevlogError(_("username %s contains a newline")
                                     % repr(user))
-        user, desc = util.fromlocal(user), util.fromlocal(desc)
+        user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
 
         if date:
             parseddate = "%d %d" % util.parsedate(date)
--- a/mercurial/cmdutil.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/cmdutil.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 
 from node import hex, nullid, nullrev, short
 from i18n import _
-import os, sys, bisect, stat
+import os, sys, bisect, stat, encoding
 import mdiff, bdiff, util, templater, templatefilters, patch, errno, error
 import match as _match
 
@@ -626,7 +626,7 @@
 
         # don't show the default branch name
         if branch != 'default':
-            branch = util.tolocal(branch)
+            branch = encoding.tolocal(branch)
             self.ui.write(_("branch:      %s\n") % branch)
         for tag in self.repo.nodetags(changenode):
             self.ui.write(_("tag:         %s\n") % tag)
@@ -791,7 +791,7 @@
         def showbranches(**args):
             branch = ctx.branch()
             if branch != 'default':
-                branch = util.tolocal(branch)
+                branch = encoding.tolocal(branch)
                 return showlist('branch', [branch], plural='branches', **args)
 
         def showparents(**args):
--- a/mercurial/commands.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/commands.py	Fri Apr 03 14:51:48 2009 -0500
@@ -9,7 +9,7 @@
 from i18n import _, gettext
 import os, re, sys
 import hg, util, revlog, bundlerepo, extensions, copies, context, error
-import difflib, patch, time, help, mdiff, tempfile, url
+import difflib, patch, time, help, mdiff, tempfile, url, encoding
 import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect
 import merge as merge_
 
@@ -415,10 +415,10 @@
             if label not in [p.branch() for p in repo.parents()]:
                 raise util.Abort(_('a branch of the same name already exists'
                                    ' (use --force to override)'))
-        repo.dirstate.setbranch(util.fromlocal(label))
+        repo.dirstate.setbranch(encoding.fromlocal(label))
         ui.status(_('marked working directory as branch %s\n') % label)
     else:
-        ui.write("%s\n" % util.tolocal(repo.dirstate.branch()))
+        ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch()))
 
 def branches(ui, repo, active=False):
     """list repository named branches
@@ -431,7 +431,7 @@
     Use the command 'hg update' to switch to an existing branch.
     """
     hexfunc = ui.debugflag and hex or short
-    activebranches = [util.tolocal(repo[n].branch())
+    activebranches = [encoding.tolocal(repo[n].branch())
                             for n in repo.heads(closed=False)]
     branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag)
                           for tag, node in repo.branchtags().items()])
@@ -449,7 +449,7 @@
                     notice = ' (closed)'
                 else:
                     notice = ' (inactive)'
-                rev = str(node).rjust(31 - util.colwidth(tag))
+                rev = str(node).rjust(31 - encoding.colwidth(tag))
                 data = tag, rev, hexfunc(hn), notice
                 ui.write("%s %s:%s%s\n" % data)
 
@@ -882,9 +882,9 @@
     problems = 0
 
     # encoding
-    ui.status(_("Checking encoding (%s)...\n") % util._encoding)
+    ui.status(_("Checking encoding (%s)...\n") % encoding.encoding)
     try:
-        util.fromlocal("test")
+        encoding.fromlocal("test")
     except util.Abort, inst:
         ui.write(" %s\n" % inst)
         ui.write(_(" (check that your locale is properly set)\n"))
@@ -1579,7 +1579,7 @@
             output.append(str(ctx.rev()))
 
     if repo.local() and default and not ui.quiet:
-        b = util.tolocal(ctx.branch())
+        b = encoding.tolocal(ctx.branch())
         if b != 'default':
             output.append("(%s)" % b)
 
@@ -1589,7 +1589,7 @@
             output.append(t)
 
     if branch:
-        output.append(util.tolocal(ctx.branch()))
+        output.append(encoding.tolocal(ctx.branch()))
 
     if tags:
         output.extend(ctx.tags())
@@ -2855,7 +2855,7 @@
         except error.LookupError:
             r = "    ?:%s" % hn
         else:
-            spaces = " " * (30 - util.colwidth(t))
+            spaces = " " * (30 - encoding.colwidth(t))
             if ui.verbose:
                 if repo.tagtype(t) == 'local':
                     tagtype = " local"
@@ -2976,8 +2976,9 @@
     ('', 'config', [], _('set/override config option')),
     ('', 'debug', None, _('enable debugging output')),
     ('', 'debugger', None, _('start debugger')),
-    ('', 'encoding', util._encoding, _('set the charset encoding')),
-    ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')),
+    ('', 'encoding', encoding.encoding, _('set the charset encoding')),
+    ('', 'encodingmode', encoding.encodingmode,
+     _('set the charset encoding mode')),
     ('', 'lsprof', None, _('print improved command execution profile')),
     ('', 'traceback', None, _('print traceback on exception')),
     ('', 'time', None, _('time how long the command takes')),
--- a/mercurial/dispatch.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/dispatch.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 from i18n import _
 import os, sys, atexit, signal, pdb, socket, errno, shlex, time
 import util, commands, hg, fancyopts, extensions, hook, error
-import cmdutil
+import cmdutil, encoding
 import ui as _ui
 
 def run():
@@ -304,7 +304,7 @@
     # check for fallback encoding
     fallback = lui.config('ui', 'fallbackencoding')
     if fallback:
-        util._fallbackencoding = fallback
+        encoding.fallbackencoding = fallback
 
     fullargs = args
     cmd, func, args, options, cmdoptions = _parse(lui, args)
@@ -319,9 +319,9 @@
             "and --repository may only be abbreviated as --repo!"))
 
     if options["encoding"]:
-        util._encoding = options["encoding"]
+        encoding.encoding = options["encoding"]
     if options["encodingmode"]:
-        util._encodingmode = options["encodingmode"]
+        encoding.encodingmode = options["encodingmode"]
     if options["time"]:
         def get_times():
             t = os.times()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/encoding.py	Fri Apr 03 14:51:48 2009 -0500
@@ -0,0 +1,77 @@
+"""
+encoding.py - character transcoding support for Mercurial
+
+ Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
+
+This software may be used and distributed according to the terms of
+the GNU General Public License version 2, incorporated herein by
+reference.
+"""
+
+import sys, unicodedata, locale, os, error
+
+_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
+
+try:
+    encoding = os.environ.get("HGENCODING")
+    if sys.platform == 'darwin' and not encoding:
+        # On darwin, getpreferredencoding ignores the locale environment and
+        # always returns mac-roman. We override this if the environment is
+        # not C (has been customized by the user).
+        locale.setlocale(locale.LC_CTYPE, '')
+        encoding = locale.getlocale()[1]
+    if not encoding:
+        encoding = locale.getpreferredencoding() or 'ascii'
+        encoding = _encodingfixup.get(encoding, encoding)
+except locale.Error:
+    encoding = 'ascii'
+encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+fallbackencoding = 'ISO-8859-1'
+
+def tolocal(s):
+    """
+    Convert a string from internal UTF-8 to local encoding
+
+    All internal strings should be UTF-8 but some repos before the
+    implementation of locale support may contain latin1 or possibly
+    other character sets. We attempt to decode everything strictly
+    using UTF-8, then Latin-1, and failing that, we use UTF-8 and
+    replace unknown characters.
+    """
+    for e in ('UTF-8', fallbackencoding):
+        try:
+            u = s.decode(e) # attempt strict decoding
+            return u.encode(encoding, "replace")
+        except LookupError, k:
+            raise error.Abort("%s, please check your locale settings" % k)
+        except UnicodeDecodeError:
+            pass
+    u = s.decode("utf-8", "replace") # last ditch
+    return u.encode(encoding, "replace")
+
+def fromlocal(s):
+    """
+    Convert a string from the local character encoding to UTF-8
+
+    We attempt to decode strings using the encoding mode set by
+    HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
+    characters will cause an error message. Other modes include
+    'replace', which replaces unknown characters with a special
+    Unicode character, and 'ignore', which drops the character.
+    """
+    try:
+        return s.decode(encoding, encodingmode).encode("utf-8")
+    except UnicodeDecodeError, inst:
+        sub = s[max(0, inst.start-10):inst.start+10]
+        raise error.Abort("decoding near '%s': %s!" % (sub, inst))
+    except LookupError, k:
+        raise error.Abort("%s, please check your locale settings" % k)
+
+def colwidth(s):
+    "Find the column width of a UTF-8 string for display"
+    d = s.decode(encoding, 'replace')
+    if hasattr(unicodedata, 'east_asian_width'):
+        w = unicodedata.east_asian_width
+        return sum([w(c) in 'WF' and 2 or 1 for c in d])
+    return len(d)
+
--- a/mercurial/hgweb/hgweb_mod.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/hgweb/hgweb_mod.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 import os
-from mercurial import ui, hg, util, hook, error
+from mercurial import ui, hg, util, hook, error, encoding
 from mercurial import templater, templatefilters
 from common import get_mtime, style_map, ErrorResponse
 from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
@@ -65,7 +65,7 @@
             self.maxshortchanges = int(self.config("web", "maxshortchanges", 60))
             self.maxfiles = int(self.config("web", "maxfiles", 10))
             self.allowpull = self.configbool("web", "allowpull", True)
-            self.encoding = self.config("web", "encoding", util._encoding)
+            self.encoding = self.config("web", "encoding", encoding.encoding)
 
     def run(self):
         if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
--- a/mercurial/hgweb/hgwebdir_mod.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/hgweb/hgwebdir_mod.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 
 import os
 from mercurial.i18n import _
-from mercurial import ui, hg, util, templater, templatefilters, error
+from mercurial import ui, hg, util, templater, templatefilters, error, encoding
 from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\
                    get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
 from hgweb_mod import hgweb
@@ -119,7 +119,7 @@
 
                 virtual = req.env.get("PATH_INFO", "").strip('/')
                 tmpl = self.templater(req)
-                ctype = tmpl('mimetype', encoding=util._encoding)
+                ctype = tmpl('mimetype', encoding=encoding.encoding)
                 ctype = templater.stringify(ctype)
 
                 # a static file
@@ -285,7 +285,7 @@
     def templater(self, req):
 
         def header(**map):
-            yield tmpl('header', encoding=util._encoding, **map)
+            yield tmpl('header', encoding=encoding.encoding, **map)
 
         def footer(**map):
             yield tmpl("footer", **map)
--- a/mercurial/i18n.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/i18n.py	Fri Apr 03 14:51:48 2009 -0500
@@ -7,7 +7,7 @@
 of the GNU General Public License, incorporated herein by reference.
 """
 
-import gettext, sys, os
+import gettext, sys, os, encoding
 
 # modelled after templater.templatepath:
 if hasattr(sys, 'frozen'):
@@ -37,15 +37,13 @@
     if message is None:
         return message
 
-    # We cannot just run the text through util.tolocal since that
-    # leads to infinite recursion when util._encoding is invalid.
+    # We cannot just run the text through encoding.tolocal since that
+    # leads to infinite recursion when encoding._encoding is invalid.
     try:
         u = t.ugettext(message)
-        return u.encode(util._encoding, "replace")
+        return u.encode(encoding.encoding, "replace")
     except LookupError:
         return message
 
 _ = gettext
 
-# Moved after _ because of circular import.
-import util
--- a/mercurial/localrepo.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/localrepo.py	Fri Apr 03 14:51:48 2009 -0500
@@ -9,7 +9,7 @@
 from i18n import _
 import repo, changegroup
 import changelog, dirstate, filelog, manifest, context, weakref
-import lock, transaction, stat, errno, ui, store
+import lock, transaction, stat, errno, ui, store, encoding
 import os, time, util, extensions, hook, inspect, error
 import match as match_
 import merge as merge_
@@ -188,7 +188,7 @@
                 fp.write(prevtags)
 
         # committed tags are stored in UTF-8
-        writetags(fp, names, util.fromlocal, prevtags)
+        writetags(fp, names, encoding.fromlocal, prevtags)
 
         if use_dirstate and '.hgtags' not in self.dirstate:
             self.add(['.hgtags'])
@@ -254,7 +254,7 @@
                     warn(_("cannot parse entry"))
                     continue
                 node, key = s
-                key = util.tolocal(key.strip()) # stored in UTF-8
+                key = encoding.tolocal(key.strip()) # stored in UTF-8
                 try:
                     bin_n = bin(node)
                 except TypeError:
@@ -297,7 +297,7 @@
             readtags(f.data().splitlines(), f, "global")
 
         try:
-            data = util.fromlocal(self.opener("localtags").read())
+            data = encoding.fromlocal(self.opener("localtags").read())
             # localtags are stored in the local character set
             # while the internal tag table is stored in UTF-8
             readtags(data.splitlines(), "localtags", "local")
@@ -397,7 +397,7 @@
         # the branch cache is stored on disk as UTF-8, but in the local
         # charset internally
         for k, v in partial.iteritems():
-            self.branchcache[util.tolocal(k)] = v
+            self.branchcache[encoding.tolocal(k)] = v
         return self.branchcache
 
 
@@ -647,7 +647,7 @@
                 except IOError:
                     self.ui.warn(_("Named branch could not be reset, "
                                    "current branch still is: %s\n")
-                                 % util.tolocal(self.dirstate.branch()))
+                                 % encoding.tolocal(self.dirstate.branch()))
                 self.invalidate()
                 self.dirstate.invalidate()
             else:
@@ -943,7 +943,8 @@
                 if p2 != nullid:
                     edittext.append("HG: branch merge")
                 if branchname:
-                    edittext.append("HG: branch '%s'" % util.tolocal(branchname))
+                    edittext.append("HG: branch '%s'"
+                                    % encoding.tolocal(branchname))
                 edittext.extend(["HG: added %s" % f for f in added])
                 edittext.extend(["HG: changed %s" % f for f in updated])
                 edittext.extend(["HG: removed %s" % f for f in removed])
--- a/mercurial/mail.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/mail.py	Fri Apr 03 14:51:48 2009 -0500
@@ -8,7 +8,7 @@
 from i18n import _
 import os, smtplib, socket
 import email.Header, email.MIMEText, email.Utils
-import util
+import util, encoding
 
 def _smtp(ui):
     '''build an smtp connection and return a function to send mail'''
@@ -100,8 +100,8 @@
 def _charsets(ui):
     '''Obtains charsets to send mail parts not containing patches.'''
     charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')]
-    fallbacks = [util._fallbackencoding.lower(),
-                 util._encoding.lower(), 'utf-8']
+    fallbacks = [encoding.fallbackencoding.lower(),
+                 encoding.encoding.lower(), 'utf-8']
     for cs in fallbacks: # util.unique does not keep order
         if cs not in charsets:
             charsets.append(cs)
@@ -110,14 +110,14 @@
 def _encode(ui, s, charsets):
     '''Returns (converted) string, charset tuple.
     Finds out best charset by cycling through sendcharsets in descending
-    order. Tries both _encoding and _fallbackencoding for input. Only as
+    order. Tries both encoding and fallbackencoding for input. Only as
     last resort send as is in fake ascii.
     Caveat: Do not use for mail parts containing patches!'''
     try:
         s.decode('ascii')
     except UnicodeDecodeError:
         sendcharsets = charsets or _charsets(ui)
-        for ics in (util._encoding, util._fallbackencoding):
+        for ics in (encoding.encoding, encoding.fallbackencoding):
             try:
                 u = s.decode(ics)
             except UnicodeDecodeError:
--- a/mercurial/templatefilters.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/templatefilters.py	Fri Apr 03 14:51:48 2009 -0500
@@ -6,7 +6,7 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 import cgi, re, os, time, urllib, textwrap
-import util, templater
+import util, templater, encoding
 
 agescales = [("second", 1),
              ("minute", 60),
@@ -76,7 +76,7 @@
     return text.replace('\n', '<br/>\n')
 
 def obfuscate(text):
-    text = unicode(text, util._encoding, 'replace')
+    text = unicode(text, encoding.encoding, 'replace')
     return ''.join(['&#%d;' % ord(c) for c in text])
 
 def domain(author):
--- a/mercurial/util.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/util.py	Fri Apr 03 14:51:48 2009 -0500
@@ -14,8 +14,8 @@
 
 from i18n import _
 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
-import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
-import imp, unicodedata
+import os, stat, threading, time, calendar, ConfigParser, glob, osutil
+import imp
 
 # Python compatibility
 
@@ -81,71 +81,6 @@
     popen3 = os.popen3
 
 
-_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
-
-try:
-    _encoding = os.environ.get("HGENCODING")
-    if sys.platform == 'darwin' and not _encoding:
-        # On darwin, getpreferredencoding ignores the locale environment and
-        # always returns mac-roman. We override this if the environment is
-        # not C (has been customized by the user).
-        locale.setlocale(locale.LC_CTYPE, '')
-        _encoding = locale.getlocale()[1]
-    if not _encoding:
-        _encoding = locale.getpreferredencoding() or 'ascii'
-        _encoding = _encodingfixup.get(_encoding, _encoding)
-except locale.Error:
-    _encoding = 'ascii'
-_encodingmode = os.environ.get("HGENCODINGMODE", "strict")
-_fallbackencoding = 'ISO-8859-1'
-
-def tolocal(s):
-    """
-    Convert a string from internal UTF-8 to local encoding
-
-    All internal strings should be UTF-8 but some repos before the
-    implementation of locale support may contain latin1 or possibly
-    other character sets. We attempt to decode everything strictly
-    using UTF-8, then Latin-1, and failing that, we use UTF-8 and
-    replace unknown characters.
-    """
-    for e in ('UTF-8', _fallbackencoding):
-        try:
-            u = s.decode(e) # attempt strict decoding
-            return u.encode(_encoding, "replace")
-        except LookupError, k:
-            raise Abort(_("%s, please check your locale settings") % k)
-        except UnicodeDecodeError:
-            pass
-    u = s.decode("utf-8", "replace") # last ditch
-    return u.encode(_encoding, "replace")
-
-def fromlocal(s):
-    """
-    Convert a string from the local character encoding to UTF-8
-
-    We attempt to decode strings using the encoding mode set by
-    HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
-    characters will cause an error message. Other modes include
-    'replace', which replaces unknown characters with a special
-    Unicode character, and 'ignore', which drops the character.
-    """
-    try:
-        return s.decode(_encoding, _encodingmode).encode("utf-8")
-    except UnicodeDecodeError, inst:
-        sub = s[max(0, inst.start-10):inst.start+10]
-        raise Abort("decoding near '%s': %s!" % (sub, inst))
-    except LookupError, k:
-        raise Abort(_("%s, please check your locale settings") % k)
-
-def colwidth(s):
-    "Find the column width of a UTF-8 string for display"
-    d = s.decode(_encoding, 'replace')
-    if hasattr(unicodedata, 'east_asian_width'):
-        w = unicodedata.east_asian_width
-        return sum([w(c) in 'WF' and 2 or 1 for c in d])
-    return len(d)
-
 def version():
     """Return version information if available."""
     try:
--- a/mercurial/win32.py	Fri Apr 03 13:20:52 2009 -0500
+++ b/mercurial/win32.py	Fri Apr 03 14:51:48 2009 -0500
@@ -16,7 +16,7 @@
 
 import errno, os, sys, pywintypes, win32con, win32file, win32process
 import cStringIO, winerror
-import osutil
+import osutil, encoding
 import util
 from win32com.shell import shell,shellcon
 
@@ -213,7 +213,7 @@
         try:
             val = QueryValueEx(OpenKey(s, key), valname)[0]
             # never let a Unicode string escape into the wild
-            return util.tolocal(val.encode('UTF-8'))
+            return encoding.tolocal(val.encode('UTF-8'))
         except EnvironmentError:
             pass