changeset 25884:b810b59eca62 stable

convert: when converting from Perforce use original local encoding by default On Windows Perforce command line client uses default system locale to encode output. Using 'latin_1' causes locale-specific characters to be replaced with question marks. With this patch we will use default locale by default whilst allowing to specify it explicity with 'convert.p4.encoding' config option. This is a potentially breaking change for any scripts relying on output treated as in 'latin_1' encoding. Also because hgext.convert.convcmd overwrites detected default system locale with UTF-8 we had to introduce an import cycle in hgext.convert.p4 to retrieve originally detected encoding from hgext.convert.convcmd.
author Eugene Baranov <eug.baranov@gmail.com>
date Wed, 22 Jul 2015 16:57:11 +0100
parents 97a9f7602014
children f6e159237260
files hgext/convert/__init__.py hgext/convert/p4.py tests/test-convert.t
diffstat 3 files changed, 15 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/convert/__init__.py	Thu Jul 30 00:58:05 2015 +0100
+++ b/hgext/convert/__init__.py	Wed Jul 22 16:57:11 2015 +0100
@@ -326,8 +326,11 @@
     usually should specify a target directory, because otherwise the
     target may be named ``...-hg``.
 
-    It is possible to limit the amount of source history to be
-    converted by specifying an initial Perforce revision:
+    The following options can be set with ``--config``:
+
+    :convert.p4.encoding: specify the encoding to use when decoding standard
+        output of the Perforce command line tool. The default is default system
+        encoding.
 
     :convert.p4.startrev: specify initial Perforce revision (a
         Perforce changelist number).
--- a/hgext/convert/p4.py	Thu Jul 30 00:58:05 2015 +0100
+++ b/hgext/convert/p4.py	Wed Jul 22 16:57:11 2015 +0100
@@ -39,6 +39,9 @@
 
 class p4_source(converter_source):
     def __init__(self, ui, path, revs=None):
+        # avoid import cycle
+        import convcmd
+
         super(p4_source, self).__init__(ui, path, revs=revs)
 
         if "/" in path and not path.startswith('//'):
@@ -54,7 +57,8 @@
         self.tags = {}
         self.lastbranch = {}
         self.parent = {}
-        self.encoding = "latin_1"
+        self.encoding = self.ui.config('convert', 'p4.encoding',
+                                       default=convcmd.orig_encoding)
         self.depotname = {}           # mapping from local name to depot name
         self.localname = {} # mapping from depot name to local name
         self.re_type = re.compile(
--- a/tests/test-convert.t	Thu Jul 30 00:58:05 2015 +0100
+++ b/tests/test-convert.t	Wed Jul 22 16:57:11 2015 +0100
@@ -275,9 +275,12 @@
       that when a depot path is given you then usually should specify a target
       directory, because otherwise the target may be named "...-hg".
   
-      It is possible to limit the amount of source history to be converted by
-      specifying an initial Perforce revision:
+      The following options can be set with "--config":
   
+      convert.p4.encoding
+                    specify the encoding to use when decoding standard output of
+                    the Perforce command line tool. The default is default
+                    system encoding.
       convert.p4.startrev
                     specify initial Perforce revision (a Perforce changelist
                     number).