view mercurial/pure/charencode.py @ 46634:ad30b29bc23d

copies: choose target directory based on longest match If one side of a merge renames `dir1/` to `dir2/` and the subdirectory `dir1/subdir1/` to `dir2/subdir2/`, and the other side of the merge adds a file in `dir1/subdir1/`, we should clearly move that into `dir2/subdir2/`. We already detect the directories correctly before this patch, but we iterate over them in arbitrary order. That results in the new file sometimes ending up in `dir2/subdir1/` instead. This patch fixes it by iterating over the source directories by visiting subdirectories first. That's achieved by simply iterating over them in reverse lexicographical order. Without the fix, the test case still passes on Python 2 but fails on Python 3. It depends on the iteration order of the dict. I did not look into how it's built up and why it behaved differently before the fix. I could probably have gotten it to fail on Python 2 as well by choosing different directory names. Differential Revision: https://phab.mercurial-scm.org/D10115
author Martin von Zweigbergk <martinvonz@google.com>
date Thu, 04 Mar 2021 16:06:55 -0800
parents 89a2afe31e82
children d4ba4d51f85f
line wrap: on
line source

# charencode.py - miscellaneous character encoding
#
#  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import array

from .. import pycompat


def isasciistr(s):
    try:
        s.decode('ascii')
        return True
    except UnicodeDecodeError:
        return False


def asciilower(s):
    """convert a string to lowercase if ASCII

    Raises UnicodeDecodeError if non-ASCII characters are found."""
    s.decode('ascii')
    return s.lower()


def asciiupper(s):
    """convert a string to uppercase if ASCII

    Raises UnicodeDecodeError if non-ASCII characters are found."""
    s.decode('ascii')
    return s.upper()


_jsonmap = []
_jsonmap.extend(b"\\u%04x" % x for x in range(32))
_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
_jsonmap.append(b'\\u007f')
_jsonmap[0x09] = b'\\t'
_jsonmap[0x0A] = b'\\n'
_jsonmap[0x22] = b'\\"'
_jsonmap[0x5C] = b'\\\\'
_jsonmap[0x08] = b'\\b'
_jsonmap[0x0C] = b'\\f'
_jsonmap[0x0D] = b'\\r'
_paranoidjsonmap = _jsonmap[:]
_paranoidjsonmap[0x3C] = b'\\u003c'  # '<' (e.g. escape "</script>")
_paranoidjsonmap[0x3E] = b'\\u003e'  # '>'
_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))


def jsonescapeu8fast(u8chars, paranoid):
    """Convert a UTF-8 byte string to JSON-escaped form (fast path)

    Raises ValueError if non-ASCII characters have to be escaped.
    """
    if paranoid:
        jm = _paranoidjsonmap
    else:
        jm = _jsonmap
    try:
        return b''.join(jm[x] for x in bytearray(u8chars))
    except IndexError:
        raise ValueError


if pycompat.ispy3:
    _utf8strict = r'surrogatepass'
else:
    _utf8strict = r'strict'


def jsonescapeu8fallback(u8chars, paranoid):
    """Convert a UTF-8 byte string to JSON-escaped form (slow path)

    Escapes all non-ASCII characters no matter if paranoid is False.
    """
    if paranoid:
        jm = _paranoidjsonmap
    else:
        jm = _jsonmap
    # non-BMP char is represented as UTF-16 surrogate pair
    u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
    u16codes = array.array('H', u16b)
    u16codes.pop(0)  # drop BOM
    return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)