changeset 43263:06a33a501aa2

phabricator: treat non-utf-8 text files as binary as phabricator requires Phabricator can't cope with text files that are not UTF-8, so requires them to be submitted as binary files instead. This has the unfortunate effect of making them practically unreviewable in Phabricator since it will only display the separate versions of the file in other views, not a diff. `phabread`ing such submissions are similar, since it will just output the binary patch, but `hg import` copes with it fine and `hg diff` afterwards will show the actual changes. It is still a marked improvement over trying to submit them as text, which just leads to corruption (Phabricator will either output ? or HTML entities for non-UTF-8 characters, depending on context). Running decode on the whole file like this seems slightly unfortunate, but I'm not aware of a better way. Needs to be done to p1() version as well to detect conversions to UTF-8. Differential Revision: https://phab.mercurial-scm.org/D7054
author Ian Moody <moz-ian@perix.co.uk>
date Thu, 10 Oct 2019 22:05:28 +0100
parents af067d29b19e
children a4da1c3b82ab
files hgext/phabricator.py
diffstat 1 files changed, 20 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/phabricator.py	Sun Oct 06 18:23:06 2019 +0100
+++ b/hgext/phabricator.py	Thu Oct 10 22:05:28 2019 +0100
@@ -697,6 +697,23 @@
 gitmode = {b'l': b'120000', b'x': b'100755', b'': b'100644'}
 
 
+def notutf8(fctx):
+    """detect non-UTF-8 text files since Phabricator requires them to be marked
+    as binary
+    """
+    try:
+        fctx.data().decode('utf-8')
+        if fctx.parents():
+            fctx.p1().data().decode('utf-8')
+        return False
+    except UnicodeDecodeError:
+        fctx.repo().ui.write(
+            _(b'file %s detected as non-UTF-8, marked as binary\n')
+            % fctx.path()
+        )
+        return True
+
+
 def addremoved(pdiff, ctx, removed):
     """add removed files to the phabdiff. Shouldn't include moves"""
     for fname in removed:
@@ -705,7 +722,7 @@
         )
         pchange.addoldmode(gitmode[ctx.p1()[fname].flags()])
         fctx = ctx.p1()[fname]
-        if not fctx.isbinary():
+        if not (fctx.isbinary() or notutf8(fctx)):
             maketext(pchange, ctx, fname)
 
         pdiff.addchange(pchange)
@@ -722,7 +739,7 @@
             pchange.addoldmode(originalmode)
             pchange.addnewmode(filemode)
 
-        if fctx.isbinary():
+        if fctx.isbinary() or notutf8(fctx):
             makebinary(pchange, fctx)
             addoldbinary(pchange, fctx, fname)
         else:
@@ -781,7 +798,7 @@
             pchange.addnewmode(gitmode[fctx.flags()])
             pchange.type = DiffChangeType.ADD
 
-        if fctx.isbinary():
+        if fctx.isbinary() or notutf8(fctx):
             makebinary(pchange, fctx)
             if renamed:
                 addoldbinary(pchange, fctx, originalfname)