view mercurial/win32.py @ 15141:16dc9a32ca04

mdiff: speed up showfunc for large diffs This addresses the following issues with showfunc: - Silly usage of regular expressions. - Doing str.rstrip() needlessly in an inner loop. - Doing catastrophic backtracking when trying to find a function line. Finding function text is now at worst O(n lines in the old file), and at best close to O(n hunks). Given a diff like this[1]: src/main/antlr3/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunker.g | 4 +- src/main/java/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunkerLexer.java | 2 +- src/main/java/uk/ac/cam/ch/wwmm/pregenerated/ChemicalChunkerParser.java | 29189 +++++---- 3 files changed, 14741 insertions(+), 14454 deletions(-) [1]: https://bitbucket.org/wwmm/chemicaltagger/changeset/d2bfbaecd4fc/raw Without this change, hg log --stat --config diff.showfunc=1 takes an absurdly long time to complete: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 32813 0 80.3546 40.6086 mercurial.mdiff:160(yieldhunk) +65062746 0 25.7227 25.7227 +<method 'match' of '_sre.SRE_Pattern' objects> +65062746 0 14.0221 14.0221 +<method 'rstrip' of 'str' objects> +1809 0 0.0009 0.0009 +mercurial.mdiff:148(contextend) +1809 0 0.0003 0.0003 +<len> 65062746 0 25.7227 25.7227 <method 'match' of '_sre.SRE_Pattern' objects> 65062763 0 14.0221 14.0221 <method 'rstrip' of 'str' objects> 543 0 0.1631 0.1631 <zlib.decompress> 3 0 0.0505 0.0505 <mercurial.bdiff.blocks> 31007 0 80.4564 0.0477 mercurial.mdiff:147(_unidiff) +32813 0 80.3546 40.6086 +mercurial.mdiff:160(yieldhunk) +3 0 0.0505 0.0505 +<mercurial.bdiff.blocks> +3618 0 0.0022 0.0022 +mercurial.mdiff:154(contextstart) +5427 0 0.0013 0.0013 +<len> +3 0 0.0001 0.0000 +re:188(compile) 1 0 80.8381 0.0322 mercurial.patch:1777(diffstatdata) +107499 0 0.0235 0.0235 +<method 'startswith' of 'str' objects> +31014 0 80.7820 0.0071 +mercurial.util:1284(iterlines) +3 0 0.0000 0.0000 +<method 'search' of '_sre.SRE_Pattern' objects> +4 0 0.0000 0.0000 +mercurial.patch:1783(addresult) +3 0 0.0000 0.0000 +<method 'group' of '_sre.SRE_Match' objects> 6 0 0.0444 0.0283 mercurial.mdiff:12(splitnewlines) +6 0 0.0160 0.0160 +<method 'split' of 'str' objects> 32 0 0.0246 0.0246 <method 'update' of '_hashlib.HASH' objects> 11 0 0.0236 0.0236 <method 'read' of 'file' objects> Time: real 80.880 secs (user 80.200+0.000 sys 0.380+0.000) With this change, it's almost as fast as not using showfunc at all: CallCount Recursive Total(ms) Inline(ms) module:lineno(function) 543 0 0.1699 0.1699 <zlib.decompress> 3 0 0.0501 0.0501 <mercurial.bdiff.blocks> 32813 0 0.0415 0.0348 mercurial.mdiff:161(yieldhunk) +70837 0 0.0058 0.0058 +<method 'isalnum' of 'str' objects> +1809 0 0.0006 0.0006 +mercurial.mdiff:148(contextend) +1809 0 0.0002 0.0002 +<len> 1 0 0.4879 0.0310 mercurial.patch:1777(diffstatdata) +107499 0 0.0230 0.0230 +<method 'startswith' of 'str' objects> +31014 0 0.4335 0.0065 +mercurial.util:1284(iterlines) +3 0 0.0000 0.0000 +<method 'search' of '_sre.SRE_Pattern' objects> +4 0 0.0000 0.0000 +mercurial.patch:1783(addresult) +1 0 0.0004 0.0000 +re:188(compile) 32 0 0.0293 0.0293 <method 'update' of '_hashlib.HASH' objects> 6 0 0.0427 0.0279 mercurial.mdiff:12(splitnewlines) +6 0 0.0147 0.0147 +<method 'split' of 'str' objects> 31007 0 0.1169 0.0235 mercurial.mdiff:147(_unidiff) +3 0 0.0501 0.0501 +<mercurial.bdiff.blocks> +32813 0 0.0415 0.0348 +mercurial.mdiff:161(yieldhunk) +3618 0 0.0012 0.0012 +mercurial.mdiff:154(contextstart) +5427 0 0.0006 0.0006 +<len> 107597 0 0.0230 0.0230 <method 'startswith' of 'str' objects> 16 0 0.0213 0.0213 <mercurial.mpatch.patches> 194 0 0.0149 0.0149 <method 'split' of 'str' objects> Time: real 0.530 secs (user 0.450+0.000 sys 0.070+0.000)
author Brodie Rao <brodie@bitheap.org>
date Mon, 19 Sep 2011 15:58:03 -0700
parents ec222a29bdf0
children e34106fa0dc3 133a7922a900
line wrap: on
line source

# win32.py - utility functions that use win32 API
#
# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import encoding
import ctypes, errno, os, struct, subprocess, random

_kernel32 = ctypes.windll.kernel32
_advapi32 = ctypes.windll.advapi32
_user32 = ctypes.windll.user32

_BOOL = ctypes.c_long
_WORD = ctypes.c_ushort
_DWORD = ctypes.c_ulong
_UINT = ctypes.c_uint
_LONG = ctypes.c_long
_LPCSTR = _LPSTR = ctypes.c_char_p
_HANDLE = ctypes.c_void_p
_HWND = _HANDLE

_INVALID_HANDLE_VALUE = _HANDLE(-1).value

# GetLastError
_ERROR_SUCCESS = 0
_ERROR_INVALID_PARAMETER = 87
_ERROR_INSUFFICIENT_BUFFER = 122

# WPARAM is defined as UINT_PTR (unsigned type)
# LPARAM is defined as LONG_PTR (signed type)
if ctypes.sizeof(ctypes.c_long) == ctypes.sizeof(ctypes.c_void_p):
    _WPARAM = ctypes.c_ulong
    _LPARAM = ctypes.c_long
elif ctypes.sizeof(ctypes.c_longlong) == ctypes.sizeof(ctypes.c_void_p):
    _WPARAM = ctypes.c_ulonglong
    _LPARAM = ctypes.c_longlong

class _FILETIME(ctypes.Structure):
    _fields_ = [('dwLowDateTime', _DWORD),
                ('dwHighDateTime', _DWORD)]

class _BY_HANDLE_FILE_INFORMATION(ctypes.Structure):
    _fields_ = [('dwFileAttributes', _DWORD),
                ('ftCreationTime', _FILETIME),
                ('ftLastAccessTime', _FILETIME),
                ('ftLastWriteTime', _FILETIME),
                ('dwVolumeSerialNumber', _DWORD),
                ('nFileSizeHigh', _DWORD),
                ('nFileSizeLow', _DWORD),
                ('nNumberOfLinks', _DWORD),
                ('nFileIndexHigh', _DWORD),
                ('nFileIndexLow', _DWORD)]

# CreateFile 
_FILE_SHARE_READ = 0x00000001
_FILE_SHARE_WRITE = 0x00000002
_FILE_SHARE_DELETE = 0x00000004

_OPEN_EXISTING = 3

# SetFileAttributes
_FILE_ATTRIBUTE_NORMAL = 0x80
_FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 0x2000

# Process Security and Access Rights
_PROCESS_QUERY_INFORMATION = 0x0400

# GetExitCodeProcess
_STILL_ACTIVE = 259

# registry
_HKEY_CURRENT_USER = 0x80000001L
_HKEY_LOCAL_MACHINE = 0x80000002L
_KEY_READ = 0x20019
_REG_SZ = 1
_REG_DWORD = 4

class _STARTUPINFO(ctypes.Structure):
    _fields_ = [('cb', _DWORD),
                ('lpReserved', _LPSTR),
                ('lpDesktop', _LPSTR),
                ('lpTitle', _LPSTR),
                ('dwX', _DWORD),
                ('dwY', _DWORD),
                ('dwXSize', _DWORD),
                ('dwYSize', _DWORD),
                ('dwXCountChars', _DWORD),
                ('dwYCountChars', _DWORD),
                ('dwFillAttribute', _DWORD),
                ('dwFlags', _DWORD),
                ('wShowWindow', _WORD),
                ('cbReserved2', _WORD),
                ('lpReserved2', ctypes.c_char_p),
                ('hStdInput', _HANDLE),
                ('hStdOutput', _HANDLE),
                ('hStdError', _HANDLE)]

class _PROCESS_INFORMATION(ctypes.Structure):
    _fields_ = [('hProcess', _HANDLE),
                ('hThread', _HANDLE),
                ('dwProcessId', _DWORD),
                ('dwThreadId', _DWORD)]

_DETACHED_PROCESS = 0x00000008
_STARTF_USESHOWWINDOW = 0x00000001
_SW_HIDE = 0

class _COORD(ctypes.Structure):
    _fields_ = [('X', ctypes.c_short),
                ('Y', ctypes.c_short)]

class _SMALL_RECT(ctypes.Structure):
    _fields_ = [('Left', ctypes.c_short),
                ('Top', ctypes.c_short),
                ('Right', ctypes.c_short),
                ('Bottom', ctypes.c_short)]

class _CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
    _fields_ = [('dwSize', _COORD),
                ('dwCursorPosition', _COORD),
                ('wAttributes', _WORD),
                ('srWindow', _SMALL_RECT),
                ('dwMaximumWindowSize', _COORD)]

_STD_ERROR_HANDLE = _DWORD(-12).value

# types of parameters of C functions used (required by pypy)

_kernel32.CreateFileA.argtypes = [_LPCSTR, _DWORD, _DWORD, ctypes.c_void_p,
    _DWORD, _DWORD, _HANDLE]
_kernel32.CreateFileA.restype = _HANDLE

_kernel32.GetFileInformationByHandle.argtypes = [_HANDLE, ctypes.c_void_p]
_kernel32.GetFileInformationByHandle.restype = _BOOL

_kernel32.CloseHandle.argtypes = [_HANDLE]
_kernel32.CloseHandle.restype = _BOOL

try:
    _kernel32.CreateHardLinkA.argtypes = [_LPCSTR, _LPCSTR, ctypes.c_void_p]
    _kernel32.CreateHardLinkA.restype = _BOOL
except AttributeError:
    pass

_kernel32.SetFileAttributesA.argtypes = [_LPCSTR, _DWORD]
_kernel32.SetFileAttributesA.restype = _BOOL

_kernel32.OpenProcess.argtypes = [_DWORD, _BOOL, _DWORD]
_kernel32.OpenProcess.restype = _HANDLE

_kernel32.GetExitCodeProcess.argtypes = [_HANDLE, ctypes.c_void_p]
_kernel32.GetExitCodeProcess.restype = _BOOL

_kernel32.GetLastError.argtypes = []
_kernel32.GetLastError.restype = _DWORD

_kernel32.GetModuleFileNameA.argtypes = [_HANDLE, ctypes.c_void_p, _DWORD]
_kernel32.GetModuleFileNameA.restype = _DWORD

_kernel32.CreateProcessA.argtypes = [_LPCSTR, _LPCSTR, ctypes.c_void_p,
    ctypes.c_void_p, _BOOL, _DWORD, ctypes.c_void_p, _LPCSTR, ctypes.c_void_p,
    ctypes.c_void_p]
_kernel32.CreateProcessA.restype = _BOOL

_kernel32.ExitProcess.argtypes = [_UINT]
_kernel32.ExitProcess.restype = None

_kernel32.GetCurrentProcessId.argtypes = []
_kernel32.GetCurrentProcessId.restype = _DWORD

_SIGNAL_HANDLER = ctypes.WINFUNCTYPE(_BOOL, _DWORD)
_kernel32.SetConsoleCtrlHandler.argtypes = [_SIGNAL_HANDLER, _BOOL]
_kernel32.SetConsoleCtrlHandler.restype = _BOOL

_kernel32.GetStdHandle.argtypes = [_DWORD]
_kernel32.GetStdHandle.restype = _HANDLE

_kernel32.GetConsoleScreenBufferInfo.argtypes = [_HANDLE, ctypes.c_void_p]
_kernel32.GetConsoleScreenBufferInfo.restype = _BOOL

_advapi32.RegOpenKeyExA.argtypes = [_HANDLE, _LPCSTR, _DWORD, _DWORD,
    ctypes.c_void_p]
_advapi32.RegOpenKeyExA.restype = _LONG

_advapi32.RegQueryValueExA.argtypes = [_HANDLE, _LPCSTR, ctypes.c_void_p,
    ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p]
_advapi32.RegQueryValueExA.restype = _LONG

_advapi32.RegCloseKey.argtypes = [_HANDLE]
_advapi32.RegCloseKey.restype = _LONG

_advapi32.GetUserNameA.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
_advapi32.GetUserNameA.restype = _BOOL

_user32.GetWindowThreadProcessId.argtypes = [_HANDLE, ctypes.c_void_p]
_user32.GetWindowThreadProcessId.restype = _DWORD

_user32.ShowWindow.argtypes = [_HANDLE, ctypes.c_int]
_user32.ShowWindow.restype = _BOOL

_WNDENUMPROC = ctypes.WINFUNCTYPE(_BOOL, _HWND, _LPARAM)
_user32.EnumWindows.argtypes = [_WNDENUMPROC, _LPARAM]
_user32.EnumWindows.restype = _BOOL

def _raiseoserror(name):
    err = ctypes.WinError()
    raise OSError(err.errno, '%s: %s' % (name, err.strerror))

def _getfileinfo(name):
    fh = _kernel32.CreateFileA(name, 0,
            _FILE_SHARE_READ | _FILE_SHARE_WRITE | _FILE_SHARE_DELETE,
            None, _OPEN_EXISTING, 0, None)
    if fh == _INVALID_HANDLE_VALUE:
        _raiseoserror(name)
    try:
        fi = _BY_HANDLE_FILE_INFORMATION()
        if not _kernel32.GetFileInformationByHandle(fh, ctypes.byref(fi)):
            _raiseoserror(name)
        return fi
    finally:
        _kernel32.CloseHandle(fh)

def oslink(src, dst):
    try:
        if not _kernel32.CreateHardLinkA(dst, src, None):
            _raiseoserror(src)
    except AttributeError: # Wine doesn't support this function
        _raiseoserror(src)

def nlinks(name):
    '''return number of hardlinks for the given file'''
    return _getfileinfo(name).nNumberOfLinks

def samefile(fpath1, fpath2):
    '''Returns whether fpath1 and fpath2 refer to the same file. This is only
    guaranteed to work for files, not directories.'''
    res1 = _getfileinfo(fpath1)
    res2 = _getfileinfo(fpath2)
    return (res1.dwVolumeSerialNumber == res2.dwVolumeSerialNumber
        and res1.nFileIndexHigh == res2.nFileIndexHigh
        and res1.nFileIndexLow == res2.nFileIndexLow)

def samedevice(fpath1, fpath2):
    '''Returns whether fpath1 and fpath2 are on the same device. This is only
    guaranteed to work for files, not directories.'''
    res1 = _getfileinfo(fpath1)
    res2 = _getfileinfo(fpath2)
    return res1.dwVolumeSerialNumber == res2.dwVolumeSerialNumber

def testpid(pid):
    '''return True if pid is still running or unable to
    determine, False otherwise'''
    h = _kernel32.OpenProcess(_PROCESS_QUERY_INFORMATION, False, pid)
    if h:
        try:
            status = _DWORD()
            if _kernel32.GetExitCodeProcess(h, ctypes.byref(status)):
                return status.value == _STILL_ACTIVE
        finally:
            _kernel32.CloseHandle(h)
    return _kernel32.GetLastError() != _ERROR_INVALID_PARAMETER

def lookupreg(key, valname=None, scope=None):
    ''' Look up a key/value name in the Windows registry.

    valname: value name. If unspecified, the default value for the key
    is used.
    scope: optionally specify scope for registry lookup, this can be
    a sequence of scopes to look up in order. Default (CURRENT_USER,
    LOCAL_MACHINE).
    '''
    byref = ctypes.byref
    if scope is None:
        scope = (_HKEY_CURRENT_USER, _HKEY_LOCAL_MACHINE)
    elif not isinstance(scope, (list, tuple)):
        scope = (scope,)
    for s in scope:
        kh = _HANDLE()
        res = _advapi32.RegOpenKeyExA(s, key, 0, _KEY_READ, ctypes.byref(kh))
        if res != _ERROR_SUCCESS:
            continue
        try:
            size = _DWORD(600)
            type = _DWORD()
            buf = ctypes.create_string_buffer(size.value + 1)
            res = _advapi32.RegQueryValueExA(kh.value, valname, None,
                                       byref(type), buf, byref(size))
            if res != _ERROR_SUCCESS:
                continue
            if type.value == _REG_SZ:
                # never let a Unicode string escape into the wild
                return encoding.tolocal(buf.value.encode('UTF-8'))
            elif type.value == _REG_DWORD:
                fmt = '<L'
                s = ctypes.string_at(byref(buf), struct.calcsize(fmt))
                return struct.unpack(fmt, s)[0]
        finally:
            _advapi32.RegCloseKey(kh.value)

def executablepath():
    '''return full path of hg.exe'''
    size = 600
    buf = ctypes.create_string_buffer(size + 1)
    len = _kernel32.GetModuleFileNameA(None, ctypes.byref(buf), size)
    if len == 0:
        raise ctypes.WinError()
    elif len == size:
        raise ctypes.WinError(_ERROR_INSUFFICIENT_BUFFER)
    return buf.value

def getuser():
    '''return name of current user'''
    size = _DWORD(300)
    buf = ctypes.create_string_buffer(size.value + 1)
    if not _advapi32.GetUserNameA(ctypes.byref(buf), ctypes.byref(size)):
        raise ctypes.WinError()
    return buf.value

_signalhandler = []

def setsignalhandler():
    '''Register a termination handler for console events including
    CTRL+C. python signal handlers do not work well with socket
    operations.
    '''
    def handler(event):
        _kernel32.ExitProcess(1)

    if _signalhandler:
        return # already registered
    h = _SIGNAL_HANDLER(handler)
    _signalhandler.append(h) # needed to prevent garbage collection
    if not _kernel32.SetConsoleCtrlHandler(h, True):
        raise ctypes.WinError()

def hidewindow():

    def callback(hwnd, pid):
        wpid = _DWORD()
        _user32.GetWindowThreadProcessId(hwnd, ctypes.byref(wpid))
        if pid == wpid.value:
            _user32.ShowWindow(hwnd, _SW_HIDE)
            return False # stop enumerating windows
        return True

    pid = _kernel32.GetCurrentProcessId()
    _user32.EnumWindows(_WNDENUMPROC(callback), pid)

def termwidth():
    # cmd.exe does not handle CR like a unix console, the CR is
    # counted in the line length. On 80 columns consoles, if 80
    # characters are written, the following CR won't apply on the
    # current line but on the new one. Keep room for it.
    width = 79
    # Query stderr to avoid problems with redirections
    screenbuf = _kernel32.GetStdHandle(
                  _STD_ERROR_HANDLE) # don't close the handle returned
    if screenbuf is None or screenbuf == _INVALID_HANDLE_VALUE:
        return width
    csbi = _CONSOLE_SCREEN_BUFFER_INFO()
    if not _kernel32.GetConsoleScreenBufferInfo(
                        screenbuf, ctypes.byref(csbi)):
        return width
    width = csbi.srWindow.Right - csbi.srWindow.Left
    return width

def spawndetached(args):
    # No standard library function really spawns a fully detached
    # process under win32 because they allocate pipes or other objects
    # to handle standard streams communications. Passing these objects
    # to the child process requires handle inheritance to be enabled
    # which makes really detached processes impossible.
    si = _STARTUPINFO()
    si.cb = ctypes.sizeof(_STARTUPINFO)
    si.dwFlags = _STARTF_USESHOWWINDOW
    si.wShowWindow = _SW_HIDE

    pi = _PROCESS_INFORMATION()

    env = ''
    for k in os.environ:
        env += "%s=%s\0" % (k, os.environ[k])
    if not env:
        env = '\0'
    env += '\0'

    args = subprocess.list2cmdline(args)
    # Not running the command in shell mode makes python26 hang when
    # writing to hgweb output socket.
    comspec = os.environ.get("COMSPEC", "cmd.exe")
    args = comspec + " /c " + args

    res = _kernel32.CreateProcessA(
        None, args, None, None, False, _DETACHED_PROCESS,
        env, os.getcwd(), ctypes.byref(si), ctypes.byref(pi))
    if not res:
        raise ctypes.WinError()

    return pi.dwProcessId

def unlink(f):
    '''try to implement POSIX' unlink semantics on Windows'''

    # POSIX allows to unlink and rename open files. Windows has serious
    # problems with doing that:
    # - Calling os.unlink (or os.rename) on a file f fails if f or any
    #   hardlinked copy of f has been opened with Python's open(). There is no
    #   way such a file can be deleted or renamed on Windows (other than
    #   scheduling the delete or rename for the next reboot).
    # - Calling os.unlink on a file that has been opened with Mercurial's
    #   posixfile (or comparable methods) will delay the actual deletion of
    #   the file for as long as the file is held open. The filename is blocked
    #   during that time and cannot be used for recreating a new file under
    #   that same name ("zombie file"). Directories containing such zombie files
    #   cannot be removed or moved.
    # A file that has been opened with posixfile can be renamed, so we rename
    # f to a random temporary name before calling os.unlink on it. This allows
    # callers to recreate f immediately while having other readers do their
    # implicit zombie filename blocking on a temporary name.

    for tries in xrange(10):
        temp = '%s-%08x' % (f, random.randint(0, 0xffffffff))
        try:
            os.rename(f, temp)  # raises OSError EEXIST if temp exists
            break
        except OSError, e:
            if e.errno != errno.EEXIST:
                raise
    else:
        raise IOError, (errno.EEXIST, "No usable temporary filename found")

    try:
        os.unlink(temp)
    except OSError:
        # The unlink might have failed because the READONLY attribute may heave
        # been set on the original file. Rename works fine with READONLY set,
        # but not os.unlink. Reset all attributes and try again.
        _kernel32.SetFileAttributesA(temp, _FILE_ATTRIBUTE_NORMAL)
        try:
            os.unlink(temp)
        except OSError:
            # The unlink might have failed due to some very rude AV-Scanners.
            # Leaking a tempfile is the lesser evil than aborting here and
            # leaving some potentially serious inconsistencies.
            pass

def makedir(path, notindexed):
    os.mkdir(path)
    if notindexed:
        _kernel32.SetFileAttributesA(path, _FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)