Mercurial > hg
changeset 7948:de377b1a9a84
move encoding bits from util to encoding
In addition to cleaning up util, this gets rid of some circular dependencies.
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Fri, 03 Apr 2009 14:51:48 -0500 |
parents | a454eeb1b827 |
children | 443c0c8636ac |
files | hgext/convert/convcmd.py hgext/highlight/highlight.py hgext/win32mbcs.py mercurial/changelog.py mercurial/cmdutil.py mercurial/commands.py mercurial/dispatch.py mercurial/encoding.py mercurial/hgweb/hgweb_mod.py mercurial/hgweb/hgwebdir_mod.py mercurial/i18n.py mercurial/localrepo.py mercurial/mail.py mercurial/templatefilters.py mercurial/util.py mercurial/win32.py |
diffstat | 16 files changed, 149 insertions(+), 136 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/convert/convcmd.py Fri Apr 03 13:20:52 2009 -0500 +++ b/hgext/convert/convcmd.py Fri Apr 03 14:51:48 2009 -0500 @@ -18,7 +18,7 @@ import filemap import os, shutil -from mercurial import hg, util +from mercurial import hg, util, encoding from mercurial.i18n import _ orig_encoding = 'ascii' @@ -275,7 +275,7 @@ if "\n" in desc: desc = desc.splitlines()[0] # convert log message to local encoding without using - # tolocal() because util._encoding conver() use it as + # tolocal() because encoding.encoding conver() use it as # 'utf-8' self.ui.status("%d %s\n" % (num, recode(desc))) self.ui.note(_("source: %s\n") % recode(c)) @@ -308,8 +308,8 @@ def convert(ui, src, dest=None, revmapfile=None, **opts): global orig_encoding - orig_encoding = util._encoding - util._encoding = 'UTF-8' + orig_encoding = encoding.encoding + encoding.encoding = 'UTF-8' if not dest: dest = hg.defaultdest(src) + "-hg"
--- a/hgext/highlight/highlight.py Fri Apr 03 13:20:52 2009 -0500 +++ b/hgext/highlight/highlight.py Fri Apr 03 14:51:48 2009 -0500 @@ -6,7 +6,7 @@ from mercurial import demandimport demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',]) -from mercurial import util +from mercurial import util, encoding from mercurial.templatefilters import filters from pygments import highlight @@ -30,19 +30,19 @@ return # avoid UnicodeDecodeError in pygments - text = util.tolocal(text) + text = encoding.tolocal(text) # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text[:1024], - encoding=util._encoding) + encoding=encoding.encoding) except (ClassNotFound, ValueError): try: - lexer = guess_lexer(text[:1024], encoding=util._encoding) + lexer = guess_lexer(text[:1024], encoding=encoding.encoding) except (ClassNotFound, ValueError): - lexer = TextLexer(encoding=util._encoding) + lexer = TextLexer(encoding=encoding.encoding) - formatter = HtmlFormatter(style=style, encoding=util._encoding) + formatter = HtmlFormatter(style=style, encoding=encoding.encoding) colorized = highlight(text, lexer, formatter) # strip wrapping div
--- a/hgext/win32mbcs.py Fri Apr 03 13:20:52 2009 -0500 +++ b/hgext/win32mbcs.py Fri Apr 03 14:51:48 2009 -0500 @@ -36,19 +36,19 @@ [extensions] hgext.win32mbcs = -Path encoding conversion are done between unicode and util._encoding +Path encoding conversion are done between unicode and encoding.encoding which is decided by mercurial from current locale setting or HGENCODING. """ import os from mercurial.i18n import _ -from mercurial import util +from mercurial import util, encoding def decode(arg): if isinstance(arg, str): - uarg = arg.decode(util._encoding) - if arg == uarg.encode(util._encoding): + uarg = arg.decode(encoding.encoding) + if arg == uarg.encode(encoding.encoding): return uarg raise UnicodeError("Not local encoding") elif isinstance(arg, tuple): @@ -59,7 +59,7 @@ def encode(arg): if isinstance(arg, unicode): - return arg.encode(util._encoding) + return arg.encode(encoding.encoding) elif isinstance(arg, tuple): return tuple(map(encode, arg)) elif isinstance(arg, list): @@ -76,10 +76,10 @@ # convert arguments to unicode, call func, then convert back return encode(func(*decode(args))) except UnicodeError: - # If not encoded with util._encoding, report it then + # If not encoded with encoding.encoding, report it then # continue with calling original function. raise util.Abort(_("[win32mbcs] filename conversion fail with" - " %s encoding\n") % (util._encoding)) + " %s encoding\n") % (encoding.encoding)) def wrapname(name): idx = name.rfind('.') @@ -115,8 +115,9 @@ return # fake is only for relevant environment. - if util._encoding.lower() in problematic_encodings.split(): + if encoding.encoding.lower() in problematic_encodings.split(): for f in funcs.split(): wrapname(f) - ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding) + ui.debug(_("[win32mbcs] activated with encoding: %s\n") + % encoding.encoding)
--- a/mercurial/changelog.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/changelog.py Fri Apr 03 14:51:48 2009 -0500 @@ -7,7 +7,7 @@ from node import bin, hex, nullid from i18n import _ -import util, error, revlog +import util, error, revlog, encoding def _string_escape(text): """ @@ -175,10 +175,10 @@ if not text: return (nullid, "", (0, 0), [], "", {'branch': 'default'}) last = text.index("\n\n") - desc = util.tolocal(text[last + 2:]) + desc = encoding.tolocal(text[last + 2:]) l = text[:last].split('\n') manifest = bin(l[0]) - user = util.tolocal(l[1]) + user = encoding.tolocal(l[1]) extra_data = l[2].split(' ', 2) if len(extra_data) != 3: @@ -205,7 +205,7 @@ if "\n" in user: raise error.RevlogError(_("username %s contains a newline") % repr(user)) - user, desc = util.fromlocal(user), util.fromlocal(desc) + user, desc = encoding.fromlocal(user), encoding.fromlocal(desc) if date: parseddate = "%d %d" % util.parsedate(date)
--- a/mercurial/cmdutil.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/cmdutil.py Fri Apr 03 14:51:48 2009 -0500 @@ -7,7 +7,7 @@ from node import hex, nullid, nullrev, short from i18n import _ -import os, sys, bisect, stat +import os, sys, bisect, stat, encoding import mdiff, bdiff, util, templater, templatefilters, patch, errno, error import match as _match @@ -626,7 +626,7 @@ # don't show the default branch name if branch != 'default': - branch = util.tolocal(branch) + branch = encoding.tolocal(branch) self.ui.write(_("branch: %s\n") % branch) for tag in self.repo.nodetags(changenode): self.ui.write(_("tag: %s\n") % tag) @@ -791,7 +791,7 @@ def showbranches(**args): branch = ctx.branch() if branch != 'default': - branch = util.tolocal(branch) + branch = encoding.tolocal(branch) return showlist('branch', [branch], plural='branches', **args) def showparents(**args):
--- a/mercurial/commands.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/commands.py Fri Apr 03 14:51:48 2009 -0500 @@ -9,7 +9,7 @@ from i18n import _, gettext import os, re, sys import hg, util, revlog, bundlerepo, extensions, copies, context, error -import difflib, patch, time, help, mdiff, tempfile, url +import difflib, patch, time, help, mdiff, tempfile, url, encoding import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect import merge as merge_ @@ -415,10 +415,10 @@ if label not in [p.branch() for p in repo.parents()]: raise util.Abort(_('a branch of the same name already exists' ' (use --force to override)')) - repo.dirstate.setbranch(util.fromlocal(label)) + repo.dirstate.setbranch(encoding.fromlocal(label)) ui.status(_('marked working directory as branch %s\n') % label) else: - ui.write("%s\n" % util.tolocal(repo.dirstate.branch())) + ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch())) def branches(ui, repo, active=False): """list repository named branches @@ -431,7 +431,7 @@ Use the command 'hg update' to switch to an existing branch. """ hexfunc = ui.debugflag and hex or short - activebranches = [util.tolocal(repo[n].branch()) + activebranches = [encoding.tolocal(repo[n].branch()) for n in repo.heads(closed=False)] branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag) for tag, node in repo.branchtags().items()]) @@ -449,7 +449,7 @@ notice = ' (closed)' else: notice = ' (inactive)' - rev = str(node).rjust(31 - util.colwidth(tag)) + rev = str(node).rjust(31 - encoding.colwidth(tag)) data = tag, rev, hexfunc(hn), notice ui.write("%s %s:%s%s\n" % data) @@ -882,9 +882,9 @@ problems = 0 # encoding - ui.status(_("Checking encoding (%s)...\n") % util._encoding) + ui.status(_("Checking encoding (%s)...\n") % encoding.encoding) try: - util.fromlocal("test") + encoding.fromlocal("test") except util.Abort, inst: ui.write(" %s\n" % inst) ui.write(_(" (check that your locale is properly set)\n")) @@ -1579,7 +1579,7 @@ output.append(str(ctx.rev())) if repo.local() and default and not ui.quiet: - b = util.tolocal(ctx.branch()) + b = encoding.tolocal(ctx.branch()) if b != 'default': output.append("(%s)" % b) @@ -1589,7 +1589,7 @@ output.append(t) if branch: - output.append(util.tolocal(ctx.branch())) + output.append(encoding.tolocal(ctx.branch())) if tags: output.extend(ctx.tags()) @@ -2855,7 +2855,7 @@ except error.LookupError: r = " ?:%s" % hn else: - spaces = " " * (30 - util.colwidth(t)) + spaces = " " * (30 - encoding.colwidth(t)) if ui.verbose: if repo.tagtype(t) == 'local': tagtype = " local" @@ -2976,8 +2976,9 @@ ('', 'config', [], _('set/override config option')), ('', 'debug', None, _('enable debugging output')), ('', 'debugger', None, _('start debugger')), - ('', 'encoding', util._encoding, _('set the charset encoding')), - ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')), + ('', 'encoding', encoding.encoding, _('set the charset encoding')), + ('', 'encodingmode', encoding.encodingmode, + _('set the charset encoding mode')), ('', 'lsprof', None, _('print improved command execution profile')), ('', 'traceback', None, _('print traceback on exception')), ('', 'time', None, _('time how long the command takes')),
--- a/mercurial/dispatch.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/dispatch.py Fri Apr 03 14:51:48 2009 -0500 @@ -8,7 +8,7 @@ from i18n import _ import os, sys, atexit, signal, pdb, socket, errno, shlex, time import util, commands, hg, fancyopts, extensions, hook, error -import cmdutil +import cmdutil, encoding import ui as _ui def run(): @@ -304,7 +304,7 @@ # check for fallback encoding fallback = lui.config('ui', 'fallbackencoding') if fallback: - util._fallbackencoding = fallback + encoding.fallbackencoding = fallback fullargs = args cmd, func, args, options, cmdoptions = _parse(lui, args) @@ -319,9 +319,9 @@ "and --repository may only be abbreviated as --repo!")) if options["encoding"]: - util._encoding = options["encoding"] + encoding.encoding = options["encoding"] if options["encodingmode"]: - util._encodingmode = options["encodingmode"] + encoding.encodingmode = options["encodingmode"] if options["time"]: def get_times(): t = os.times()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/encoding.py Fri Apr 03 14:51:48 2009 -0500 @@ -0,0 +1,77 @@ +""" +encoding.py - character transcoding support for Mercurial + + Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others + +This software may be used and distributed according to the terms of +the GNU General Public License version 2, incorporated herein by +reference. +""" + +import sys, unicodedata, locale, os, error + +_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} + +try: + encoding = os.environ.get("HGENCODING") + if sys.platform == 'darwin' and not encoding: + # On darwin, getpreferredencoding ignores the locale environment and + # always returns mac-roman. We override this if the environment is + # not C (has been customized by the user). + locale.setlocale(locale.LC_CTYPE, '') + encoding = locale.getlocale()[1] + if not encoding: + encoding = locale.getpreferredencoding() or 'ascii' + encoding = _encodingfixup.get(encoding, encoding) +except locale.Error: + encoding = 'ascii' +encodingmode = os.environ.get("HGENCODINGMODE", "strict") +fallbackencoding = 'ISO-8859-1' + +def tolocal(s): + """ + Convert a string from internal UTF-8 to local encoding + + All internal strings should be UTF-8 but some repos before the + implementation of locale support may contain latin1 or possibly + other character sets. We attempt to decode everything strictly + using UTF-8, then Latin-1, and failing that, we use UTF-8 and + replace unknown characters. + """ + for e in ('UTF-8', fallbackencoding): + try: + u = s.decode(e) # attempt strict decoding + return u.encode(encoding, "replace") + except LookupError, k: + raise error.Abort("%s, please check your locale settings" % k) + except UnicodeDecodeError: + pass + u = s.decode("utf-8", "replace") # last ditch + return u.encode(encoding, "replace") + +def fromlocal(s): + """ + Convert a string from the local character encoding to UTF-8 + + We attempt to decode strings using the encoding mode set by + HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown + characters will cause an error message. Other modes include + 'replace', which replaces unknown characters with a special + Unicode character, and 'ignore', which drops the character. + """ + try: + return s.decode(encoding, encodingmode).encode("utf-8") + except UnicodeDecodeError, inst: + sub = s[max(0, inst.start-10):inst.start+10] + raise error.Abort("decoding near '%s': %s!" % (sub, inst)) + except LookupError, k: + raise error.Abort("%s, please check your locale settings" % k) + +def colwidth(s): + "Find the column width of a UTF-8 string for display" + d = s.decode(encoding, 'replace') + if hasattr(unicodedata, 'east_asian_width'): + w = unicodedata.east_asian_width + return sum([w(c) in 'WF' and 2 or 1 for c in d]) + return len(d) +
--- a/mercurial/hgweb/hgweb_mod.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/hgweb/hgweb_mod.py Fri Apr 03 14:51:48 2009 -0500 @@ -7,7 +7,7 @@ # of the GNU General Public License, incorporated herein by reference. import os -from mercurial import ui, hg, util, hook, error +from mercurial import ui, hg, util, hook, error, encoding from mercurial import templater, templatefilters from common import get_mtime, style_map, ErrorResponse from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR @@ -65,7 +65,7 @@ self.maxshortchanges = int(self.config("web", "maxshortchanges", 60)) self.maxfiles = int(self.config("web", "maxfiles", 10)) self.allowpull = self.configbool("web", "allowpull", True) - self.encoding = self.config("web", "encoding", util._encoding) + self.encoding = self.config("web", "encoding", encoding.encoding) def run(self): if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
--- a/mercurial/hgweb/hgwebdir_mod.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/hgweb/hgwebdir_mod.py Fri Apr 03 14:51:48 2009 -0500 @@ -8,7 +8,7 @@ import os from mercurial.i18n import _ -from mercurial import ui, hg, util, templater, templatefilters, error +from mercurial import ui, hg, util, templater, templatefilters, error, encoding from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\ get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR from hgweb_mod import hgweb @@ -119,7 +119,7 @@ virtual = req.env.get("PATH_INFO", "").strip('/') tmpl = self.templater(req) - ctype = tmpl('mimetype', encoding=util._encoding) + ctype = tmpl('mimetype', encoding=encoding.encoding) ctype = templater.stringify(ctype) # a static file @@ -285,7 +285,7 @@ def templater(self, req): def header(**map): - yield tmpl('header', encoding=util._encoding, **map) + yield tmpl('header', encoding=encoding.encoding, **map) def footer(**map): yield tmpl("footer", **map)
--- a/mercurial/i18n.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/i18n.py Fri Apr 03 14:51:48 2009 -0500 @@ -7,7 +7,7 @@ of the GNU General Public License, incorporated herein by reference. """ -import gettext, sys, os +import gettext, sys, os, encoding # modelled after templater.templatepath: if hasattr(sys, 'frozen'): @@ -37,15 +37,13 @@ if message is None: return message - # We cannot just run the text through util.tolocal since that - # leads to infinite recursion when util._encoding is invalid. + # We cannot just run the text through encoding.tolocal since that + # leads to infinite recursion when encoding._encoding is invalid. try: u = t.ugettext(message) - return u.encode(util._encoding, "replace") + return u.encode(encoding.encoding, "replace") except LookupError: return message _ = gettext -# Moved after _ because of circular import. -import util
--- a/mercurial/localrepo.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/localrepo.py Fri Apr 03 14:51:48 2009 -0500 @@ -9,7 +9,7 @@ from i18n import _ import repo, changegroup import changelog, dirstate, filelog, manifest, context, weakref -import lock, transaction, stat, errno, ui, store +import lock, transaction, stat, errno, ui, store, encoding import os, time, util, extensions, hook, inspect, error import match as match_ import merge as merge_ @@ -188,7 +188,7 @@ fp.write(prevtags) # committed tags are stored in UTF-8 - writetags(fp, names, util.fromlocal, prevtags) + writetags(fp, names, encoding.fromlocal, prevtags) if use_dirstate and '.hgtags' not in self.dirstate: self.add(['.hgtags']) @@ -254,7 +254,7 @@ warn(_("cannot parse entry")) continue node, key = s - key = util.tolocal(key.strip()) # stored in UTF-8 + key = encoding.tolocal(key.strip()) # stored in UTF-8 try: bin_n = bin(node) except TypeError: @@ -297,7 +297,7 @@ readtags(f.data().splitlines(), f, "global") try: - data = util.fromlocal(self.opener("localtags").read()) + data = encoding.fromlocal(self.opener("localtags").read()) # localtags are stored in the local character set # while the internal tag table is stored in UTF-8 readtags(data.splitlines(), "localtags", "local") @@ -397,7 +397,7 @@ # the branch cache is stored on disk as UTF-8, but in the local # charset internally for k, v in partial.iteritems(): - self.branchcache[util.tolocal(k)] = v + self.branchcache[encoding.tolocal(k)] = v return self.branchcache @@ -647,7 +647,7 @@ except IOError: self.ui.warn(_("Named branch could not be reset, " "current branch still is: %s\n") - % util.tolocal(self.dirstate.branch())) + % encoding.tolocal(self.dirstate.branch())) self.invalidate() self.dirstate.invalidate() else: @@ -943,7 +943,8 @@ if p2 != nullid: edittext.append("HG: branch merge") if branchname: - edittext.append("HG: branch '%s'" % util.tolocal(branchname)) + edittext.append("HG: branch '%s'" + % encoding.tolocal(branchname)) edittext.extend(["HG: added %s" % f for f in added]) edittext.extend(["HG: changed %s" % f for f in updated]) edittext.extend(["HG: removed %s" % f for f in removed])
--- a/mercurial/mail.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/mail.py Fri Apr 03 14:51:48 2009 -0500 @@ -8,7 +8,7 @@ from i18n import _ import os, smtplib, socket import email.Header, email.MIMEText, email.Utils -import util +import util, encoding def _smtp(ui): '''build an smtp connection and return a function to send mail''' @@ -100,8 +100,8 @@ def _charsets(ui): '''Obtains charsets to send mail parts not containing patches.''' charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')] - fallbacks = [util._fallbackencoding.lower(), - util._encoding.lower(), 'utf-8'] + fallbacks = [encoding.fallbackencoding.lower(), + encoding.encoding.lower(), 'utf-8'] for cs in fallbacks: # util.unique does not keep order if cs not in charsets: charsets.append(cs) @@ -110,14 +110,14 @@ def _encode(ui, s, charsets): '''Returns (converted) string, charset tuple. Finds out best charset by cycling through sendcharsets in descending - order. Tries both _encoding and _fallbackencoding for input. Only as + order. Tries both encoding and fallbackencoding for input. Only as last resort send as is in fake ascii. Caveat: Do not use for mail parts containing patches!''' try: s.decode('ascii') except UnicodeDecodeError: sendcharsets = charsets or _charsets(ui) - for ics in (util._encoding, util._fallbackencoding): + for ics in (encoding.encoding, encoding.fallbackencoding): try: u = s.decode(ics) except UnicodeDecodeError:
--- a/mercurial/templatefilters.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/templatefilters.py Fri Apr 03 14:51:48 2009 -0500 @@ -6,7 +6,7 @@ # of the GNU General Public License, incorporated herein by reference. import cgi, re, os, time, urllib, textwrap -import util, templater +import util, templater, encoding agescales = [("second", 1), ("minute", 60), @@ -76,7 +76,7 @@ return text.replace('\n', '<br/>\n') def obfuscate(text): - text = unicode(text, util._encoding, 'replace') + text = unicode(text, encoding.encoding, 'replace') return ''.join(['&#%d;' % ord(c) for c in text]) def domain(author):
--- a/mercurial/util.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/util.py Fri Apr 03 14:51:48 2009 -0500 @@ -14,8 +14,8 @@ from i18n import _ import cStringIO, errno, re, shutil, sys, tempfile, traceback, error -import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil -import imp, unicodedata +import os, stat, threading, time, calendar, ConfigParser, glob, osutil +import imp # Python compatibility @@ -81,71 +81,6 @@ popen3 = os.popen3 -_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} - -try: - _encoding = os.environ.get("HGENCODING") - if sys.platform == 'darwin' and not _encoding: - # On darwin, getpreferredencoding ignores the locale environment and - # always returns mac-roman. We override this if the environment is - # not C (has been customized by the user). - locale.setlocale(locale.LC_CTYPE, '') - _encoding = locale.getlocale()[1] - if not _encoding: - _encoding = locale.getpreferredencoding() or 'ascii' - _encoding = _encodingfixup.get(_encoding, _encoding) -except locale.Error: - _encoding = 'ascii' -_encodingmode = os.environ.get("HGENCODINGMODE", "strict") -_fallbackencoding = 'ISO-8859-1' - -def tolocal(s): - """ - Convert a string from internal UTF-8 to local encoding - - All internal strings should be UTF-8 but some repos before the - implementation of locale support may contain latin1 or possibly - other character sets. We attempt to decode everything strictly - using UTF-8, then Latin-1, and failing that, we use UTF-8 and - replace unknown characters. - """ - for e in ('UTF-8', _fallbackencoding): - try: - u = s.decode(e) # attempt strict decoding - return u.encode(_encoding, "replace") - except LookupError, k: - raise Abort(_("%s, please check your locale settings") % k) - except UnicodeDecodeError: - pass - u = s.decode("utf-8", "replace") # last ditch - return u.encode(_encoding, "replace") - -def fromlocal(s): - """ - Convert a string from the local character encoding to UTF-8 - - We attempt to decode strings using the encoding mode set by - HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown - characters will cause an error message. Other modes include - 'replace', which replaces unknown characters with a special - Unicode character, and 'ignore', which drops the character. - """ - try: - return s.decode(_encoding, _encodingmode).encode("utf-8") - except UnicodeDecodeError, inst: - sub = s[max(0, inst.start-10):inst.start+10] - raise Abort("decoding near '%s': %s!" % (sub, inst)) - except LookupError, k: - raise Abort(_("%s, please check your locale settings") % k) - -def colwidth(s): - "Find the column width of a UTF-8 string for display" - d = s.decode(_encoding, 'replace') - if hasattr(unicodedata, 'east_asian_width'): - w = unicodedata.east_asian_width - return sum([w(c) in 'WF' and 2 or 1 for c in d]) - return len(d) - def version(): """Return version information if available.""" try:
--- a/mercurial/win32.py Fri Apr 03 13:20:52 2009 -0500 +++ b/mercurial/win32.py Fri Apr 03 14:51:48 2009 -0500 @@ -16,7 +16,7 @@ import errno, os, sys, pywintypes, win32con, win32file, win32process import cStringIO, winerror -import osutil +import osutil, encoding import util from win32com.shell import shell,shellcon @@ -213,7 +213,7 @@ try: val = QueryValueEx(OpenKey(s, key), valname)[0] # never let a Unicode string escape into the wild - return util.tolocal(val.encode('UTF-8')) + return encoding.tolocal(val.encode('UTF-8')) except EnvironmentError: pass