Mercurial: mercurial/util.py comparison

comparison mercurial/util.py @ 37083:f99d64e8a4e4

stringutil: move generic string helpers to new module Per https://phab.mercurial-scm.org/D2903#46738 URL and file paths functions are left since they are big enough to make separate modules.

author	Yuya Nishihara <yuya@tcha.org>
date	Thu, 22 Mar 2018 21:19:31 +0900
parents	1a1d1c44b570
children	f0b6fbea00cf

comparison

equal deleted inserted replaced

-:1a1d1c44b570
+:f99d64e8a4e4
 from __future__ import absolute_import, print_function
 import abc
 import bz2
-import codecs
 import collections
 import contextlib
 import errno
 import gc
 import hashlib
 import socket
 import stat
 import subprocess
 import sys
 import tempfile
-import textwrap
 import time
 import traceback
 import warnings
 import zlib
 node as nodemod,
 policy,
 pycompat,
 urllibcompat,
 )
-from .utils import dateutil
+from .utils import (
+dateutil,
+stringutil,
+)
 base85 = policy.importmod(r'base85')
 osutil = policy.importmod(r'osutil')
 parsers = policy.importmod(r'parsers')
 def setsockopt(self, *args, **kwargs):
 return object.__getattribute__(self, r'_observedcall')(
 r'setsockopt', *args, **kwargs)
-_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
-_DATA_ESCAPE_MAP.update({
-b'\\': b'\\\\',
-b'\r': br'\r',
-b'\n': br'\n',
-})
-_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
-def escapedata(s):
-if isinstance(s, bytearray):
-s = bytes(s)
-return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s)
 class baseproxyobserver(object):
 def _writedata(self, data):
 if not self.logdata:
 if self.logdataapis:
 self.fh.write('\n')
 "filter a string through a command that transforms its input to its output"
 for name, fn in filtertable.iteritems():
 if cmd.startswith(name):
 return fn(s, cmd[len(name):].lstrip())
 return pipefilter(s, cmd)
-def binary(s):
-"""return true if a string is binary data"""
-return bool(s and '\0' in s)
 def increasingchunks(source, min=1024, max=65536):
 '''return no less than min bytes per chunk while data remains,
 doubling min after each chunk until it reaches max'''
 def log2(x):
 return None
 b[0:len(res)] = res
 return len(res)
-def stringmatcher(pattern, casesensitive=True):
-"""
-accepts a string, possibly starting with 're:' or 'literal:' prefix.
-returns the matcher name, pattern, and matcher function.
-missing or unknown prefixes are treated as literal matches.
-helper for tests:
->>> def test(pattern, *tests):
-...     kind, pattern, matcher = stringmatcher(pattern)
-...     return (kind, pattern, [bool(matcher(t)) for t in tests])
->>> def itest(pattern, *tests):
-...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
-...     return (kind, pattern, [bool(matcher(t)) for t in tests])
-exact matching (no prefix):
->>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
-('literal', 'abcdefg', [False, False, True])
-regex matching ('re:' prefix)
->>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
-('re', 'a.+b', [False, False, True])
-force exact matches ('literal:' prefix)
->>> test(b'literal:re:foobar', b'foobar', b're:foobar')
-('literal', 're:foobar', [False, True])
-unknown prefixes are ignored and treated as literals
->>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
-('literal', 'foo:bar', [False, False, True])
-case insensitive regex matches
->>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
-('re', 'A.+b', [False, False, True])
-case insensitive literal matches
->>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
-('literal', 'ABCDEFG', [False, False, True])
-"""
-if pattern.startswith('re:'):
-pattern = pattern[3:]
-try:
-flags = 0
-if not casesensitive:
-flags = remod.I
-regex = remod.compile(pattern, flags)
-except remod.error as e:
-raise error.ParseError(_('invalid regular expression: %s')
-% e)
-return 're', pattern, regex.search
-elif pattern.startswith('literal:'):
-pattern = pattern[8:]
-match = pattern.__eq__
-if not casesensitive:
-ipat = encoding.lower(pattern)
-match = lambda s: ipat == encoding.lower(s)
-return 'literal', pattern, match
-def shortuser(user):
-"""Return a short representation of a user name or email address."""
-f = user.find('@')
-if f >= 0:
-user = user[:f]
-f = user.find('<')
-if f >= 0:
-user = user[f + 1:]
-f = user.find(' ')
-if f >= 0:
-user = user[:f]
-f = user.find('.')
-if f >= 0:
-user = user[:f]
-return user
-def emailuser(user):
-"""Return the user portion of an email address."""
-f = user.find('@')
-if f >= 0:
-user = user[:f]
-f = user.find('<')
-if f >= 0:
-user = user[f + 1:]
-return user
-def email(author):
-'''get email of author.'''
-r = author.find('>')
-if r == -1:
-r = None
-return author[author.find('<') + 1:r]
-def ellipsis(text, maxlength=400):
-"""Trim string to at most maxlength (default: 400) columns in display."""
-return encoding.trim(text, maxlength, ellipsis='...')
 def unitcountfn(*unittable):
 '''return a function that renders a readable count of some quantity'''
 def go(count):
 for multiplier, divisor, format in unittable:
 nativeeolwriter = _crlfwriter
 else:
 tonativeeol = pycompat.identity
 fromnativeeol = pycompat.identity
 nativeeolwriter = pycompat.identity
-def escapestr(s):
-# call underlying function of s.encode('string_escape') directly for
-# Python 3 compatibility
-return codecs.escape_encode(s)[0]
-def unescapestr(s):
-return codecs.escape_decode(s)[0]
-def forcebytestr(obj):
-"""Portably format an arbitrary object (e.g. exception) into a byte
-string."""
-try:
-return pycompat.bytestr(obj)
-except UnicodeEncodeError:
-# non-ascii string, may be lossy
-return pycompat.bytestr(encoding.strtolocal(str(obj)))
-def uirepr(s):
-# Avoid double backslash in Windows path repr()
-return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
-# delay import of textwrap
-def _MBTextWrapper(**kwargs):
-class tw(textwrap.TextWrapper):
-"""
-Extend TextWrapper for width-awareness.
-Neither number of 'bytes' in any encoding nor 'characters' is
-appropriate to calculate terminal columns for specified string.
-Original TextWrapper implementation uses built-in 'len()' directly,
-so overriding is needed to use width information of each characters.
-In addition, characters classified into 'ambiguous' width are
-treated as wide in East Asian area, but as narrow in other.
-This requires use decision to determine width of such characters.
-"""
-def _cutdown(self, ucstr, space_left):
-l = 0
-colwidth = encoding.ucolwidth
-for i in xrange(len(ucstr)):
-l += colwidth(ucstr[i])
-if space_left < l:
-return (ucstr[:i], ucstr[i:])
-return ucstr, ''
-# overriding of base class
-def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
-space_left = max(width - cur_len, 1)
-if self.break_long_words:
-cut, res = self._cutdown(reversed_chunks[-1], space_left)
-cur_line.append(cut)
-reversed_chunks[-1] = res
-elif not cur_line:
-cur_line.append(reversed_chunks.pop())
-# this overriding code is imported from TextWrapper of Python 2.6
-# to calculate columns of string by 'encoding.ucolwidth()'
-def _wrap_chunks(self, chunks):
-colwidth = encoding.ucolwidth
-lines = []
-if self.width <= 0:
-raise ValueError("invalid width %r (must be > 0)" % self.width)
-# Arrange in reverse order so items can be efficiently popped
-# from a stack of chucks.
-chunks.reverse()
-while chunks:
-# Start the list of chunks that will make up the current line.
-# cur_len is just the length of all the chunks in cur_line.
-cur_line = []
-cur_len = 0
-# Figure out which static string will prefix this line.
-if lines:
-indent = self.subsequent_indent
-else:
-indent = self.initial_indent
-# Maximum width for this line.
-width = self.width - len(indent)
-# First chunk on line is whitespace -- drop it, unless this
-# is the very beginning of the text (i.e. no lines started yet).
-if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
-del chunks[-1]
-while chunks:
-l = colwidth(chunks[-1])
-# Can at least squeeze this chunk onto the current line.
-if cur_len + l <= width:
-cur_line.append(chunks.pop())
-cur_len += l
-# Nope, this line is full.
-else:
-break
-# The current line is full, and the next chunk is too big to
-# fit on *any* line (not just this one).
-if chunks and colwidth(chunks[-1]) > width:
-self._handle_long_word(chunks, cur_line, cur_len, width)
-# If the last chunk on this line is all whitespace, drop it.
-if (self.drop_whitespace and
-cur_line and cur_line[-1].strip() == r''):
-del cur_line[-1]
-# Convert current line back to a string and store it in list
-# of all lines (return value).
-if cur_line:
-lines.append(indent + r''.join(cur_line))
-return lines
-global _MBTextWrapper
-_MBTextWrapper = tw
-return tw(**kwargs)
-def wrap(line, width, initindent='', hangindent=''):
-maxindent = max(len(hangindent), len(initindent))
-if width <= maxindent:
-# adjust for weird terminal size
-width = max(78, maxindent + 1)
-line = line.decode(pycompat.sysstr(encoding.encoding),
-pycompat.sysstr(encoding.encodingmode))
-initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
-pycompat.sysstr(encoding.encodingmode))
-hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
-pycompat.sysstr(encoding.encodingmode))
-wrapper = _MBTextWrapper(width=width,
-initial_indent=initindent,
-subsequent_indent=hangindent)
-return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
 if (pyplatform.python_implementation() == 'CPython' and
 sys.version_info < (3, 0)):
 # There is an issue in CPython that some IO methods do not handle EINTR
 # correctly. The following table shows what CPython version (and functions)
 try:
 return socket.getservbyname(pycompat.sysstr(port))
 except socket.error:
 raise Abort(_("no port number associated with service '%s'") % port)
-_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
-'0': False, 'no': False, 'false': False, 'off': False,
-'never': False}
-def parsebool(s):
-"""Parse s into a boolean.
-If s is not a valid boolean, returns None.
-"""
-return _booleans.get(s.lower(), None)
 class url(object):
 r"""Reliable URL parser.
 This parses URLs and provides attributes for the following
 components:
 shortdate = _deprecatedfunc(dateutil.shortdate, '4.6')
 parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6')
 strdate = _deprecatedfunc(dateutil.strdate, '4.6')
 parsedate = _deprecatedfunc(dateutil.parsedate, '4.6')
 matchdate = _deprecatedfunc(dateutil.matchdate, '4.6')
+def _deprecatedfunc(func, version):  # TODO
+return func
+escapedata = _deprecatedfunc(stringutil.escapedata, '4.6')
+binary = _deprecatedfunc(stringutil.binary, '4.6')
+stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6')
+shortuser = _deprecatedfunc(stringutil.shortuser, '4.6')
+emailuser = _deprecatedfunc(stringutil.emailuser, '4.6')
+email = _deprecatedfunc(stringutil.email, '4.6')
+ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6')
+escapestr = _deprecatedfunc(stringutil.escapestr, '4.6')
+unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6')
+forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6')
+uirepr = _deprecatedfunc(stringutil.uirepr, '4.6')
+wrap = _deprecatedfunc(stringutil.wrap, '4.6')
+parsebool = _deprecatedfunc(stringutil.parsebool, '4.6')

Mercurial > hg

comparison mercurial/util.py @ 37083:f99d64e8a4e4