fsmonitor: refresh pywatchman to upstream
Update to upstream to version c77452. The refresh includes fixes to improve
windows compatibility.
There is a minor update to 'test-check-py3-compat.t' as c77452 no longer have
the py3 compatibility issues the previous version had.
# no-check-commit
--- a/hgext/fsmonitor/pywatchman/__init__.py Thu Dec 22 11:07:59 2016 -0800
+++ b/hgext/fsmonitor/pywatchman/__init__.py Thu Dec 22 11:22:32 2016 -0800
@@ -26,9 +26,14 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+import inspect
+import math
import os
-import errno
-import math
import socket
import subprocess
import time
@@ -36,11 +41,20 @@
# Sometimes it's really hard to get Python extensions to compile,
# so fall back to a pure Python implementation.
try:
- import bser
+ from . import bser
+ # Demandimport causes modules to be loaded lazily. Force the load now
+ # so that we can fall back on pybser if bser doesn't exist
+ bser.pdu_info
except ImportError:
- import pybser as bser
+ from . import pybser as bser
-import capabilities
+from . import (
+ capabilities,
+ compat,
+ encoding,
+ load,
+)
+
if os.name == 'nt':
import ctypes
@@ -55,18 +69,29 @@
FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000
FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100
FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200
+ WAIT_FAILED = 0xFFFFFFFF
WAIT_TIMEOUT = 0x00000102
WAIT_OBJECT_0 = 0x00000000
- ERROR_IO_PENDING = 997
+ WAIT_IO_COMPLETION = 0x000000C0
+ INFINITE = 0xFFFFFFFF
+
+ # Overlapped I/O operation is in progress. (997)
+ ERROR_IO_PENDING = 0x000003E5
+
+ # The pointer size follows the architecture
+ # We use WPARAM since this type is already conditionally defined
+ ULONG_PTR = ctypes.wintypes.WPARAM
class OVERLAPPED(ctypes.Structure):
_fields_ = [
- ("Internal", wintypes.ULONG), ("InternalHigh", wintypes.ULONG),
+ ("Internal", ULONG_PTR), ("InternalHigh", ULONG_PTR),
("Offset", wintypes.DWORD), ("OffsetHigh", wintypes.DWORD),
("hEvent", wintypes.HANDLE)
]
def __init__(self):
+ self.Internal = 0
+ self.InternalHigh = 0
self.Offset = 0
self.OffsetHigh = 0
self.hEvent = 0
@@ -97,6 +122,10 @@
GetLastError.argtypes = []
GetLastError.restype = wintypes.DWORD
+ SetLastError = ctypes.windll.kernel32.SetLastError
+ SetLastError.argtypes = [wintypes.DWORD]
+ SetLastError.restype = None
+
FormatMessage = ctypes.windll.kernel32.FormatMessageA
FormatMessage.argtypes = [wintypes.DWORD, wintypes.LPVOID, wintypes.DWORD,
wintypes.DWORD, ctypes.POINTER(wintypes.LPSTR),
@@ -105,12 +134,30 @@
LocalFree = ctypes.windll.kernel32.LocalFree
- GetOverlappedResultEx = ctypes.windll.kernel32.GetOverlappedResultEx
- GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
- ctypes.POINTER(OVERLAPPED), LPDWORD,
- wintypes.DWORD, wintypes.BOOL]
- GetOverlappedResultEx.restype = wintypes.BOOL
+ GetOverlappedResult = ctypes.windll.kernel32.GetOverlappedResult
+ GetOverlappedResult.argtypes = [wintypes.HANDLE,
+ ctypes.POINTER(OVERLAPPED), LPDWORD,
+ wintypes.BOOL]
+ GetOverlappedResult.restype = wintypes.BOOL
+ GetOverlappedResultEx = getattr(ctypes.windll.kernel32,
+ 'GetOverlappedResultEx', None)
+ if GetOverlappedResultEx is not None:
+ GetOverlappedResultEx.argtypes = [wintypes.HANDLE,
+ ctypes.POINTER(OVERLAPPED), LPDWORD,
+ wintypes.DWORD, wintypes.BOOL]
+ GetOverlappedResultEx.restype = wintypes.BOOL
+
+ WaitForSingleObjectEx = ctypes.windll.kernel32.WaitForSingleObjectEx
+ WaitForSingleObjectEx.argtypes = [wintypes.HANDLE, wintypes.DWORD, wintypes.BOOL]
+ WaitForSingleObjectEx.restype = wintypes.DWORD
+
+ CreateEvent = ctypes.windll.kernel32.CreateEventA
+ CreateEvent.argtypes = [LPDWORD, wintypes.BOOL, wintypes.BOOL,
+ wintypes.LPSTR]
+ CreateEvent.restype = wintypes.HANDLE
+
+ # Windows Vista is the minimum supported client for CancelIoEx.
CancelIoEx = ctypes.windll.kernel32.CancelIoEx
CancelIoEx.argtypes = [wintypes.HANDLE, ctypes.POINTER(OVERLAPPED)]
CancelIoEx.restype = wintypes.BOOL
@@ -132,8 +179,47 @@
pass
+def _win32_strerror(err):
+ """ expand a win32 error code into a human readable message """
+
+ # FormatMessage will allocate memory and assign it here
+ buf = ctypes.c_char_p()
+ FormatMessage(
+ FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
+ | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
+ try:
+ return buf.value
+ finally:
+ LocalFree(buf)
+
+
class WatchmanError(Exception):
- pass
+ def __init__(self, msg=None, cmd=None):
+ self.msg = msg
+ self.cmd = cmd
+
+ def setCommand(self, cmd):
+ self.cmd = cmd
+
+ def __str__(self):
+ if self.cmd:
+ return '%s, while executing %s' % (self.msg, self.cmd)
+ return self.msg
+
+
+class WatchmanEnvironmentError(WatchmanError):
+ def __init__(self, msg, errno, errmsg, cmd=None):
+ super(WatchmanEnvironmentError, self).__init__(
+ '{0}: errno={1} errmsg={2}'.format(msg, errno, errmsg),
+ cmd)
+
+
+class SocketConnectError(WatchmanError):
+ def __init__(self, sockpath, exc):
+ super(SocketConnectError, self).__init__(
+ 'unable to connect to %s: %s' % (sockpath, exc))
+ self.sockpath = sockpath
+ self.exc = exc
class SocketTimeout(WatchmanError):
@@ -151,19 +237,11 @@
self.msg is the message returned by watchman.
"""
-
def __init__(self, msg, cmd=None):
- self.msg = msg
- self.cmd = cmd
- super(CommandError, self).__init__('watchman command error: %s' % msg)
-
- def setCommand(self, cmd):
- self.cmd = cmd
-
- def __str__(self):
- if self.cmd:
- return '%s, while executing %s' % (self.msg, self.cmd)
- return self.msg
+ super(CommandError, self).__init__(
+ 'watchman command error: %s' % (msg, ),
+ cmd,
+ )
class Transport(object):
@@ -195,16 +273,16 @@
# Buffer may already have a line if we've received unilateral
# response(s) from the server
- if len(self.buf) == 1 and "\n" in self.buf[0]:
- (line, b) = self.buf[0].split("\n", 1)
+ if len(self.buf) == 1 and b"\n" in self.buf[0]:
+ (line, b) = self.buf[0].split(b"\n", 1)
self.buf = [b]
return line
while True:
b = self.readBytes(4096)
- if "\n" in b:
- result = ''.join(self.buf)
- (line, b) = b.split("\n", 1)
+ if b"\n" in b:
+ result = b''.join(self.buf)
+ (line, b) = b.split(b"\n", 1)
self.buf = [b]
return result + line
self.buf.append(b)
@@ -241,8 +319,8 @@
sock.connect(self.sockpath)
self.sock = sock
except socket.error as e:
- raise WatchmanError('unable to connect to %s: %s' %
- (self.sockpath, e))
+ sock.close()
+ raise SocketConnectError(self.sockpath, e)
def close(self):
self.sock.close()
@@ -268,6 +346,46 @@
raise SocketTimeout('timed out sending query command')
+def _get_overlapped_result_ex_impl(pipe, olap, nbytes, millis, alertable):
+ """ Windows 7 and earlier does not support GetOverlappedResultEx. The
+ alternative is to use GetOverlappedResult and wait for read or write
+ operation to complete. This is done be using CreateEvent and
+ WaitForSingleObjectEx. CreateEvent, WaitForSingleObjectEx
+ and GetOverlappedResult are all part of Windows API since WindowsXP.
+ This is the exact same implementation that can be found in the watchman
+ source code (see get_overlapped_result_ex_impl in stream_win.c). This
+ way, maintenance should be simplified.
+ """
+ log('Preparing to wait for maximum %dms', millis )
+ if millis != 0:
+ waitReturnCode = WaitForSingleObjectEx(olap.hEvent, millis, alertable)
+ if waitReturnCode == WAIT_OBJECT_0:
+ # Event is signaled, overlapped IO operation result should be available.
+ pass
+ elif waitReturnCode == WAIT_IO_COMPLETION:
+ # WaitForSingleObjectEx returnes because the system added an I/O completion
+ # routine or an asynchronous procedure call (APC) to the thread queue.
+ SetLastError(WAIT_IO_COMPLETION)
+ pass
+ elif waitReturnCode == WAIT_TIMEOUT:
+ # We reached the maximum allowed wait time, the IO operation failed
+ # to complete in timely fashion.
+ SetLastError(WAIT_TIMEOUT)
+ return False
+ elif waitReturnCode == WAIT_FAILED:
+ # something went wrong calling WaitForSingleObjectEx
+ err = GetLastError()
+ log('WaitForSingleObjectEx failed: %s', _win32_strerror(err))
+ return False
+ else:
+ # unexpected situation deserving investigation.
+ err = GetLastError()
+ log('Unexpected error: %s', _win32_strerror(err))
+ return False
+
+ return GetOverlappedResult(pipe, olap, nbytes, False)
+
+
class WindowsNamedPipeTransport(Transport):
""" connect to a named pipe """
@@ -284,28 +402,35 @@
self._raise_win_err('failed to open pipe %s' % sockpath,
GetLastError())
- def _win32_strerror(self, err):
- """ expand a win32 error code into a human readable message """
+ # event for the overlapped I/O operations
+ self._waitable = CreateEvent(None, True, False, None)
+ if self._waitable is None:
+ self._raise_win_err('CreateEvent failed', GetLastError())
- # FormatMessage will allocate memory and assign it here
- buf = ctypes.c_char_p()
- FormatMessage(
- FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER
- | FORMAT_MESSAGE_IGNORE_INSERTS, None, err, 0, buf, 0, None)
- try:
- return buf.value
- finally:
- LocalFree(buf)
+ self._get_overlapped_result_ex = GetOverlappedResultEx
+ if (os.getenv('WATCHMAN_WIN7_COMPAT') == '1' or
+ self._get_overlapped_result_ex is None):
+ self._get_overlapped_result_ex = _get_overlapped_result_ex_impl
def _raise_win_err(self, msg, err):
raise IOError('%s win32 error code: %d %s' %
- (msg, err, self._win32_strerror(err)))
+ (msg, err, _win32_strerror(err)))
def close(self):
if self.pipe:
+ log('Closing pipe')
CloseHandle(self.pipe)
self.pipe = None
+ if self._waitable is not None:
+ # We release the handle for the event
+ CloseHandle(self._waitable)
+ self._waitable = None
+
+ def setTimeout(self, value):
+ # convert to milliseconds
+ self.timeout = int(value * 1000)
+
def readBytes(self, size):
""" A read can block for an unbounded amount of time, even if the
kernel reports that the pipe handle is signalled, so we need to
@@ -325,6 +450,7 @@
# We need to initiate a read
buf = ctypes.create_string_buffer(size)
olap = OVERLAPPED()
+ olap.hEvent = self._waitable
log('made read buff of size %d', size)
@@ -339,8 +465,9 @@
GetLastError())
nread = wintypes.DWORD()
- if not GetOverlappedResultEx(self.pipe, olap, nread,
- 0 if immediate else self.timeout, True):
+ if not self._get_overlapped_result_ex(self.pipe, olap, nread,
+ 0 if immediate else self.timeout,
+ True):
err = GetLastError()
CancelIoEx(self.pipe, olap)
@@ -374,6 +501,8 @@
def write(self, data):
olap = OVERLAPPED()
+ olap.hEvent = self._waitable
+
immediate = WriteFile(self.pipe, ctypes.c_char_p(data), len(data),
None, olap)
@@ -385,8 +514,10 @@
# Obtain results, waiting if needed
nwrote = wintypes.DWORD()
- if GetOverlappedResultEx(self.pipe, olap, nwrote, 0 if immediate else
- self.timeout, True):
+ if self._get_overlapped_result_ex(self.pipe, olap, nwrote,
+ 0 if immediate else self.timeout,
+ True):
+ log('made write of %d bytes', nwrote.value)
return nwrote.value
err = GetLastError()
@@ -430,7 +561,10 @@
def close(self):
if self.proc:
- self.proc.kill()
+ if self.proc.pid is not None:
+ self.proc.kill()
+ self.proc.stdin.close()
+ self.proc.stdout.close()
self.proc = None
def _connect(self):
@@ -438,7 +572,7 @@
return self.proc
args = [
'watchman',
- '--sockname={}'.format(self.sockpath),
+ '--sockname={0}'.format(self.sockpath),
'--logfile=/BOGUS',
'--statefile=/BOGUS',
'--no-spawn',
@@ -460,8 +594,8 @@
def write(self, data):
if self.closed:
+ self.close()
self.closed = False
- self.proc = None
self._connect()
res = self.proc.stdin.write(data)
self.proc.stdin.close()
@@ -473,21 +607,21 @@
""" use the BSER encoding. This is the default, preferred codec """
def _loads(self, response):
- return bser.loads(response)
+ return bser.loads(response) # Defaults to BSER v1
def receive(self):
buf = [self.transport.readBytes(sniff_len)]
if not buf[0]:
raise WatchmanError('empty watchman response')
- elen = bser.pdu_len(buf[0])
+ _1, _2, elen = bser.pdu_info(buf[0])
rlen = len(buf[0])
while elen > rlen:
buf.append(self.transport.readBytes(elen - rlen))
rlen += len(buf[-1])
- response = ''.join(buf)
+ response = b''.join(buf)
try:
res = self._loads(response)
return res
@@ -495,7 +629,7 @@
raise WatchmanError('watchman response decode error: %s' % e)
def send(self, *args):
- cmd = bser.dumps(*args)
+ cmd = bser.dumps(*args) # Defaults to BSER v1
self.transport.write(cmd)
@@ -504,7 +638,64 @@
immutable object support """
def _loads(self, response):
- return bser.loads(response, False)
+ return bser.loads(response, False) # Defaults to BSER v1
+
+
+class Bser2WithFallbackCodec(BserCodec):
+ """ use BSER v2 encoding """
+
+ def __init__(self, transport):
+ super(Bser2WithFallbackCodec, self).__init__(transport)
+ # Once the server advertises support for bser-v2 we should switch this
+ # to 'required' on Python 3.
+ self.send(["version", {"optional": ["bser-v2"]}])
+
+ capabilities = self.receive()
+
+ if 'error' in capabilities:
+ raise Exception('Unsupported BSER version')
+
+ if capabilities['capabilities']['bser-v2']:
+ self.bser_version = 2
+ self.bser_capabilities = 0
+ else:
+ self.bser_version = 1
+ self.bser_capabilities = 0
+
+ def _loads(self, response):
+ return bser.loads(response)
+
+ def receive(self):
+ buf = [self.transport.readBytes(sniff_len)]
+ if not buf[0]:
+ raise WatchmanError('empty watchman response')
+
+ recv_bser_version, recv_bser_capabilities, elen = bser.pdu_info(buf[0])
+
+ if hasattr(self, 'bser_version'):
+ # Readjust BSER version and capabilities if necessary
+ self.bser_version = max(self.bser_version, recv_bser_version)
+ self.capabilities = self.bser_capabilities & recv_bser_capabilities
+
+ rlen = len(buf[0])
+ while elen > rlen:
+ buf.append(self.transport.readBytes(elen - rlen))
+ rlen += len(buf[-1])
+
+ response = b''.join(buf)
+ try:
+ res = self._loads(response)
+ return res
+ except ValueError as e:
+ raise WatchmanError('watchman response decode error: %s' % e)
+
+ def send(self, *args):
+ if hasattr(self, 'bser_version'):
+ cmd = bser.dumps(*args, version=self.bser_version,
+ capabilities=self.bser_capabilities)
+ else:
+ cmd = bser.dumps(*args)
+ self.transport.write(cmd)
class JsonCodec(Codec):
@@ -520,6 +711,13 @@
def receive(self):
line = self.transport.readLine()
try:
+ # In Python 3, json.loads is a transformation from Unicode string to
+ # objects possibly containing Unicode strings. We typically expect
+ # the JSON blob to be ASCII-only with non-ASCII characters escaped,
+ # but it's possible we might get non-ASCII bytes that are valid
+ # UTF-8.
+ if compat.PYTHON3:
+ line = line.decode('utf-8')
return self.json.loads(line)
except Exception as e:
print(e, line)
@@ -527,7 +725,12 @@
def send(self, *args):
cmd = self.json.dumps(*args)
- self.transport.write(cmd + "\n")
+ # In Python 3, json.dumps is a transformation from objects possibly
+ # containing Unicode strings to Unicode string. Even with (the default)
+ # ensure_ascii=True, dumps returns a Unicode string.
+ if compat.PYTHON3:
+ cmd = cmd.encode('ascii')
+ self.transport.write(cmd + b"\n")
class client(object):
@@ -556,22 +759,27 @@
self.timeout = timeout
self.useImmutableBser = useImmutableBser
- transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
- if transport == 'local' and os.name == 'nt':
- self.transport = WindowsNamedPipeTransport
- elif transport == 'local':
- self.transport = UnixSocketTransport
- elif transport == 'cli':
- self.transport = CLIProcessTransport
- if sendEncoding is None:
- sendEncoding = 'json'
- if recvEncoding is None:
- recvEncoding = sendEncoding
+ if inspect.isclass(transport) and issubclass(transport, Transport):
+ self.transport = transport
else:
- raise WatchmanError('invalid transport %s' % transport)
+ transport = transport or os.getenv('WATCHMAN_TRANSPORT') or 'local'
+ if transport == 'local' and os.name == 'nt':
+ self.transport = WindowsNamedPipeTransport
+ elif transport == 'local':
+ self.transport = UnixSocketTransport
+ elif transport == 'cli':
+ self.transport = CLIProcessTransport
+ if sendEncoding is None:
+ sendEncoding = 'json'
+ if recvEncoding is None:
+ recvEncoding = sendEncoding
+ else:
+ raise WatchmanError('invalid transport %s' % transport)
- sendEncoding = sendEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
- recvEncoding = recvEncoding or os.getenv('WATCHMAN_ENCODING') or 'bser'
+ sendEncoding = str(sendEncoding or os.getenv('WATCHMAN_ENCODING') or
+ 'bser')
+ recvEncoding = str(recvEncoding or os.getenv('WATCHMAN_ENCODING') or
+ 'bser')
self.recvCodec = self._parseEncoding(recvEncoding)
self.sendCodec = self._parseEncoding(sendEncoding)
@@ -581,6 +789,8 @@
if self.useImmutableBser:
return ImmutableBserCodec
return BserCodec
+ elif enc == 'experimental-bser-v2':
+ return Bser2WithFallbackCodec
elif enc == 'json':
return JsonCodec
else:
@@ -600,10 +810,20 @@
cmd = ['watchman', '--output-encoding=bser', 'get-sockname']
try:
- p = subprocess.Popen(cmd,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=os.name != 'nt')
+ args = dict(stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=os.name != 'nt')
+
+ if os.name == 'nt':
+ # if invoked via an application with graphical user interface,
+ # this call will cause a brief command window pop-up.
+ # Using the flag STARTF_USESHOWWINDOW to avoid this behavior.
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ args['startupinfo'] = startupinfo
+
+ p = subprocess.Popen(cmd, **args)
+
except OSError as e:
raise WatchmanError('"watchman" executable not in PATH (%s)', e)
@@ -614,10 +834,10 @@
raise WatchmanError("watchman exited with code %d" % exitcode)
result = bser.loads(stdout)
- if 'error' in result:
+ if b'error' in result:
raise WatchmanError('get-sockname error: %s' % result['error'])
- return result['sockname']
+ return result[b'sockname']
def _connect(self):
""" establish transport connection """
@@ -660,10 +880,16 @@
self._connect()
result = self.recvConn.receive()
if self._hasprop(result, 'error'):
- raise CommandError(result['error'])
+ error = result['error']
+ if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
+ error = result['error'].decode('utf-8', 'surrogateescape')
+ raise CommandError(error)
if self._hasprop(result, 'log'):
- self.logs.append(result['log'])
+ log = result['log']
+ if compat.PYTHON3 and isinstance(self.recvConn, BserCodec):
+ log = log.decode('utf-8', 'surrogateescape')
+ self.logs.append(log)
if self._hasprop(result, 'subscription'):
sub = result['subscription']
@@ -682,6 +908,9 @@
return result
def isUnilateralResponse(self, res):
+ if 'unilateral' in res and res['unilateral']:
+ return True
+ # Fall back to checking for known unilateral responses
for k in self.unilateral:
if k in res:
return True
@@ -712,6 +941,13 @@
remove processing impacts both the unscoped and scoped stores
for the subscription data.
"""
+ if compat.PYTHON3 and issubclass(self.recvCodec, BserCodec):
+ # People may pass in Unicode strings here -- but currently BSER only
+ # returns bytestrings. Deal with that.
+ if isinstance(root, str):
+ root = encoding.encode_local(root)
+ if isinstance(name, str):
+ name = name.encode('utf-8')
if root is not None:
if not root in self.sub_by_root:
@@ -752,9 +988,17 @@
res = self.receive()
return res
- except CommandError as ex:
+ except EnvironmentError as ee:
+ # When we can depend on Python 3, we can use PEP 3134
+ # exception chaining here.
+ raise WatchmanEnvironmentError(
+ 'I/O error communicating with watchman daemon',
+ ee.errno,
+ ee.strerror,
+ args)
+ except WatchmanError as ex:
ex.setCommand(args)
- raise ex
+ raise
def capabilityCheck(self, optional=None, required=None):
""" Perform a server capability check """
@@ -775,5 +1019,3 @@
def setTimeout(self, value):
self.recvConn.setTimeout(value)
self.sendConn.setTimeout(value)
-
-# no-check-code -- this is a 3rd party library
--- a/hgext/fsmonitor/pywatchman/bser.c Thu Dec 22 11:07:59 2016 -0800
+++ b/hgext/fsmonitor/pywatchman/bser.c Thu Dec 22 11:22:32 2016 -0800
@@ -29,11 +29,27 @@
*/
#include <Python.h>
+#include <bytesobject.h>
#ifdef _MSC_VER
#define inline __inline
-#include "msc_stdint.h"
+#if _MSC_VER >= 1800
+#include <stdint.h>
+#else
+// The compiler associated with Python 2.7 on Windows doesn't ship
+// with stdint.h, so define the small subset that we use here.
+typedef __int8 int8_t;
+typedef __int16 int16_t;
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#define UINT32_MAX 4294967295U
+#endif
#endif
+// clang-format off
/* Return the smallest size int that can store the value */
#define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \
((x) == ((int16_t)x)) ? 2 : \
@@ -41,7 +57,7 @@
#define BSER_ARRAY 0x00
#define BSER_OBJECT 0x01
-#define BSER_STRING 0x02
+#define BSER_BYTESTRING 0x02
#define BSER_INT8 0x03
#define BSER_INT16 0x04
#define BSER_INT32 0x05
@@ -52,6 +68,8 @@
#define BSER_NULL 0x0a
#define BSER_TEMPLATE 0x0b
#define BSER_SKIP 0x0c
+#define BSER_UTF8STRING 0x0d
+// clang-format on
// An immutable object representation of BSER_OBJECT.
// Rather than build a hash table, key -> value are obtained
@@ -64,24 +82,27 @@
// approach, this is still faster for the mercurial use case
// as it helps to eliminate creating N other objects to
// represent the stat information in the hgwatchman extension
+// clang-format off
typedef struct {
PyObject_HEAD
PyObject *keys; // tuple of field names
PyObject *values; // tuple of values
} bserObject;
+// clang-format on
-static Py_ssize_t bserobj_tuple_length(PyObject *o) {
- bserObject *obj = (bserObject*)o;
+static Py_ssize_t bserobj_tuple_length(PyObject* o) {
+ bserObject* obj = (bserObject*)o;
return PySequence_Length(obj->keys);
}
-static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) {
- bserObject *obj = (bserObject*)o;
+static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) {
+ bserObject* obj = (bserObject*)o;
return PySequence_GetItem(obj->values, i);
}
+// clang-format off
static PySequenceMethods bserobj_sq = {
bserobj_tuple_length, /* sq_length */
0, /* sq_concat */
@@ -92,49 +113,72 @@
0, /* sq_inplace_concat */
0 /* sq_inplace_repeat */
};
+// clang-format on
-static void bserobj_dealloc(PyObject *o) {
- bserObject *obj = (bserObject*)o;
+static void bserobj_dealloc(PyObject* o) {
+ bserObject* obj = (bserObject*)o;
Py_CLEAR(obj->keys);
Py_CLEAR(obj->values);
PyObject_Del(o);
}
-static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
- bserObject *obj = (bserObject*)o;
+static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) {
+ bserObject* obj = (bserObject*)o;
Py_ssize_t i, n;
- const char *namestr;
+ PyObject* name_bytes = NULL;
+ PyObject* ret = NULL;
+ const char* namestr;
if (PyIndex_Check(name)) {
i = PyNumber_AsSsize_t(name, PyExc_IndexError);
if (i == -1 && PyErr_Occurred()) {
- return NULL;
+ goto bail;
}
- return PySequence_GetItem(obj->values, i);
+ ret = PySequence_GetItem(obj->values, i);
+ goto bail;
}
+ // We can be passed in Unicode objects here -- we don't support anything other
+ // than UTF-8 for keys.
+ if (PyUnicode_Check(name)) {
+ name_bytes = PyUnicode_AsUTF8String(name);
+ if (name_bytes == NULL) {
+ goto bail;
+ }
+ namestr = PyBytes_AsString(name_bytes);
+ } else {
+ namestr = PyBytes_AsString(name);
+ }
+
+ if (namestr == NULL) {
+ goto bail;
+ }
// hack^Wfeature to allow mercurial to use "st_size" to reference "size"
- namestr = PyString_AsString(name);
if (!strncmp(namestr, "st_", 3)) {
namestr += 3;
}
n = PyTuple_GET_SIZE(obj->keys);
for (i = 0; i < n; i++) {
- const char *item_name = NULL;
- PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
+ const char* item_name = NULL;
+ PyObject* key = PyTuple_GET_ITEM(obj->keys, i);
- item_name = PyString_AsString(key);
+ item_name = PyBytes_AsString(key);
if (!strcmp(item_name, namestr)) {
- return PySequence_GetItem(obj->values, i);
+ ret = PySequence_GetItem(obj->values, i);
+ goto bail;
}
}
- PyErr_Format(PyExc_AttributeError,
- "bserobject has no attribute '%.400s'", namestr);
- return NULL;
+
+ PyErr_Format(
+ PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
+bail:
+ Py_XDECREF(name_bytes);
+ return ret;
}
+// clang-format off
static PyMappingMethods bserobj_map = {
bserobj_tuple_length, /* mp_length */
bserobj_getattrro, /* mp_subscript */
@@ -181,20 +225,27 @@
0, /* tp_alloc */
0, /* tp_new */
};
-
+// clang-format on
-static PyObject *bser_loads_recursive(const char **ptr, const char *end,
- int mutable);
+typedef struct loads_ctx {
+ int mutable;
+ const char* value_encoding;
+ const char* value_errors;
+ uint32_t bser_version;
+ uint32_t bser_capabilities;
+} unser_ctx_t;
+
+static PyObject*
+bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
static const char bser_true = BSER_TRUE;
static const char bser_false = BSER_FALSE;
static const char bser_null = BSER_NULL;
-static const char bser_string_hdr = BSER_STRING;
+static const char bser_bytestring_hdr = BSER_BYTESTRING;
static const char bser_array_hdr = BSER_ARRAY;
static const char bser_object_hdr = BSER_OBJECT;
-static inline uint32_t next_power_2(uint32_t n)
-{
+static inline uint32_t next_power_2(uint32_t n) {
n |= (n >> 16);
n |= (n >> 8);
n |= (n >> 4);
@@ -205,16 +256,17 @@
// A buffer we use for building up the serialized result
struct bser_buffer {
- char *buf;
+ char* buf;
int wpos, allocd;
+ uint32_t bser_version;
+ uint32_t capabilities;
};
typedef struct bser_buffer bser_t;
-static int bser_append(bser_t *bser, const char *data, uint32_t len)
-{
+static int bser_append(bser_t* bser, const char* data, uint32_t len) {
int newlen = next_power_2(bser->wpos + len);
if (newlen > bser->allocd) {
- char *nbuf = realloc(bser->buf, newlen);
+ char* nbuf = realloc(bser->buf, newlen);
if (!nbuf) {
return 0;
}
@@ -228,40 +280,46 @@
return 1;
}
-static int bser_init(bser_t *bser)
-{
+static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
bser->allocd = 8192;
bser->wpos = 0;
bser->buf = malloc(bser->allocd);
-
+ bser->bser_version = version;
+ bser->capabilities = capabilities;
if (!bser->buf) {
return 0;
}
- // Leave room for the serialization header, which includes
- // our overall length. To make things simpler, we'll use an
- // int32 for the header
+// Leave room for the serialization header, which includes
+// our overall length. To make things simpler, we'll use an
+// int32 for the header
#define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
- bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
+
+// Version 2 also carries an integer indicating the capabilities. The
+// capabilities integer comes before the PDU size.
+#define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
+ if (version == 2) {
+ bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
+ } else {
+ bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1);
+ }
return 1;
}
-static void bser_dtor(bser_t *bser)
-{
+static void bser_dtor(bser_t* bser) {
free(bser->buf);
bser->buf = NULL;
}
-static int bser_long(bser_t *bser, int64_t val)
-{
+static int bser_long(bser_t* bser, int64_t val) {
int8_t i8;
int16_t i16;
int32_t i32;
int64_t i64;
char sz;
int size = INT_SIZE(val);
- char *iptr;
+ char* iptr;
switch (size) {
case 1:
@@ -285,8 +343,7 @@
iptr = (char*)&i64;
break;
default:
- PyErr_SetString(PyExc_RuntimeError,
- "Cannot represent this long value!?");
+ PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
return 0;
}
@@ -297,25 +354,24 @@
return bser_append(bser, iptr, size);
}
-static int bser_string(bser_t *bser, PyObject *sval)
-{
- char *buf = NULL;
+static int bser_bytestring(bser_t* bser, PyObject* sval) {
+ char* buf = NULL;
Py_ssize_t len;
int res;
- PyObject *utf = NULL;
+ PyObject* utf = NULL;
if (PyUnicode_Check(sval)) {
utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
sval = utf;
}
- res = PyString_AsStringAndSize(sval, &buf, &len);
+ res = PyBytes_AsStringAndSize(sval, &buf, &len);
if (res == -1) {
res = 0;
goto out;
}
- if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
+ if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
res = 0;
goto out;
}
@@ -341,8 +397,7 @@
return res;
}
-static int bser_recursive(bser_t *bser, PyObject *val)
-{
+static int bser_recursive(bser_t* bser, PyObject* val) {
if (PyBool_Check(val)) {
if (val == Py_True) {
return bser_append(bser, &bser_true, sizeof(bser_true));
@@ -354,19 +409,21 @@
return bser_append(bser, &bser_null, sizeof(bser_null));
}
+// Python 3 has one integer type.
+#if PY_MAJOR_VERSION < 3
if (PyInt_Check(val)) {
return bser_long(bser, PyInt_AS_LONG(val));
}
+#endif // PY_MAJOR_VERSION < 3
if (PyLong_Check(val)) {
return bser_long(bser, PyLong_AsLongLong(val));
}
- if (PyString_Check(val) || PyUnicode_Check(val)) {
- return bser_string(bser, val);
+ if (PyBytes_Check(val) || PyUnicode_Check(val)) {
+ return bser_bytestring(bser, val);
}
-
if (PyFloat_Check(val)) {
double dval = PyFloat_AS_DOUBLE(val);
char sz = BSER_REAL;
@@ -390,7 +447,7 @@
}
for (i = 0; i < len; i++) {
- PyObject *ele = PyList_GET_ITEM(val, i);
+ PyObject* ele = PyList_GET_ITEM(val, i);
if (!bser_recursive(bser, ele)) {
return 0;
@@ -412,7 +469,7 @@
}
for (i = 0; i < len; i++) {
- PyObject *ele = PyTuple_GET_ITEM(val, i);
+ PyObject* ele = PyTuple_GET_ITEM(val, i);
if (!bser_recursive(bser, ele)) {
return 0;
@@ -436,7 +493,7 @@
}
while (PyDict_Next(val, &pos, &key, &ele)) {
- if (!bser_string(bser, key)) {
+ if (!bser_bytestring(bser, key)) {
return 0;
}
if (!bser_recursive(bser, ele)) {
@@ -451,17 +508,25 @@
return 0;
}
-static PyObject *bser_dumps(PyObject *self, PyObject *args)
-{
+static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
PyObject *val = NULL, *res;
bser_t bser;
- uint32_t len;
+ uint32_t len, bser_version = 1, bser_capabilities = 0;
+
+ static char* kw_list[] = {"val", "version", "capabilities", NULL};
- if (!PyArg_ParseTuple(args, "O", &val)) {
+ if (!PyArg_ParseTupleAndKeywords(
+ args,
+ kw,
+ "O|ii:dumps",
+ kw_list,
+ &val,
+ &bser_version,
+ &bser_capabilities)) {
return NULL;
}
- if (!bser_init(&bser)) {
+ if (!bser_init(&bser, bser_version, bser_capabilities)) {
return PyErr_NoMemory();
}
@@ -475,19 +540,25 @@
}
// Now fill in the overall length
- len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
- memcpy(bser.buf + 3, &len, sizeof(len));
+ if (bser_version == 1) {
+ len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
+ memcpy(bser.buf + 3, &len, sizeof(len));
+ } else {
+ len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
+ // The BSER capabilities block comes before the PDU length
+ memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
+ memcpy(bser.buf + 7, &len, sizeof(len));
+ }
- res = PyString_FromStringAndSize(bser.buf, bser.wpos);
+ res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
bser_dtor(&bser);
return res;
}
-int bunser_int(const char **ptr, const char *end, int64_t *val)
-{
+int bunser_int(const char** ptr, const char* end, int64_t* val) {
int needed;
- const char *buf = *ptr;
+ const char* buf = *ptr;
int8_t i8;
int16_t i16;
int32_t i32;
@@ -507,8 +578,8 @@
needed = 9;
break;
default:
- PyErr_Format(PyExc_ValueError,
- "invalid bser int encoding 0x%02x", buf[0]);
+ PyErr_Format(
+ PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
return 0;
}
if (end - buf < needed) {
@@ -538,10 +609,12 @@
}
}
-static int bunser_string(const char **ptr, const char *end,
- const char **start, int64_t *len)
-{
- const char *buf = *ptr;
+static int bunser_bytestring(
+ const char** ptr,
+ const char* end,
+ const char** start,
+ int64_t* len) {
+ const char* buf = *ptr;
// skip string marker
buf++;
@@ -559,11 +632,12 @@
return 1;
}
-static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
-{
- const char *buf = *ptr;
+static PyObject*
+bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+ const char* buf = *ptr;
int64_t nitems, i;
- PyObject *res;
+ int mutable = ctx->mutable;
+ PyObject* res;
// skip array header
buf++;
@@ -584,7 +658,7 @@
}
for (i = 0; i < nitems; i++) {
- PyObject *ele = bser_loads_recursive(ptr, end, mutable);
+ PyObject* ele = bser_loads_recursive(ptr, end, ctx);
if (!ele) {
Py_DECREF(res);
@@ -602,13 +676,13 @@
return res;
}
-static PyObject *bunser_object(const char **ptr, const char *end,
- int mutable)
-{
- const char *buf = *ptr;
+static PyObject*
+bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+ const char* buf = *ptr;
int64_t nitems, i;
- PyObject *res;
- bserObject *obj;
+ int mutable = ctx->mutable;
+ PyObject* res;
+ bserObject* obj;
// skip array header
buf++;
@@ -627,12 +701,12 @@
}
for (i = 0; i < nitems; i++) {
- const char *keystr;
+ const char* keystr;
int64_t keylen;
- PyObject *key;
- PyObject *ele;
+ PyObject* key;
+ PyObject* ele;
- if (!bunser_string(ptr, end, &keystr, &keylen)) {
+ if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
Py_DECREF(res);
return NULL;
}
@@ -643,13 +717,24 @@
return NULL;
}
- key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+ if (mutable) {
+ // This will interpret the key as UTF-8.
+ key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+ } else {
+ // For immutable objects we'll manage key lookups, so we can avoid going
+ // through the Unicode APIs. This avoids a potentially expensive and
+ // definitely unnecessary conversion to UTF-16 and back for Python 2.
+ // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
+ // Unicode keys there without an appreciable performance loss.
+ key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
+ }
+
if (!key) {
Py_DECREF(res);
return NULL;
}
- ele = bser_loads_recursive(ptr, end, mutable);
+ ele = bser_loads_recursive(ptr, end, ctx);
if (!ele) {
Py_DECREF(key);
@@ -671,14 +756,24 @@
return res;
}
-static PyObject *bunser_template(const char **ptr, const char *end,
- int mutable)
-{
- const char *buf = *ptr;
+static PyObject*
+bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
+ const char* buf = *ptr;
int64_t nitems, i;
- PyObject *arrval;
- PyObject *keys;
+ int mutable = ctx->mutable;
+ PyObject* arrval;
+ PyObject* keys;
Py_ssize_t numkeys, keyidx;
+ unser_ctx_t keys_ctx = {0};
+ if (mutable) {
+ keys_ctx.mutable = 1;
+ // Decode keys as UTF-8 in this case.
+ keys_ctx.value_encoding = "utf-8";
+ keys_ctx.value_errors = "strict";
+ } else {
+ // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
+ // lookup time.
+ }
if (buf[1] != BSER_ARRAY) {
PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
@@ -689,8 +784,9 @@
buf++;
*ptr = buf;
- // Load template keys
- keys = bunser_array(ptr, end, mutable);
+ // Load template keys.
+ // For keys we don't want to do any decoding right now.
+ keys = bunser_array(ptr, end, &keys_ctx);
if (!keys) {
return NULL;
}
@@ -716,8 +812,8 @@
}
for (i = 0; i < nitems; i++) {
- PyObject *dict = NULL;
- bserObject *obj = NULL;
+ PyObject* dict = NULL;
+ bserObject* obj = NULL;
if (mutable) {
dict = PyDict_New();
@@ -731,22 +827,22 @@
dict = (PyObject*)obj;
}
if (!dict) {
-fail:
+ fail:
Py_DECREF(keys);
Py_DECREF(arrval);
return NULL;
}
for (keyidx = 0; keyidx < numkeys; keyidx++) {
- PyObject *key;
- PyObject *ele;
+ PyObject* key;
+ PyObject* ele;
if (**ptr == BSER_SKIP) {
*ptr = *ptr + 1;
ele = Py_None;
Py_INCREF(ele);
} else {
- ele = bser_loads_recursive(ptr, end, mutable);
+ ele = bser_loads_recursive(ptr, end, ctx);
}
if (!ele) {
@@ -772,34 +868,38 @@
return arrval;
}
-static PyObject *bser_loads_recursive(const char **ptr, const char *end,
- int mutable)
-{
- const char *buf = *ptr;
+static PyObject* bser_loads_recursive(
+ const char** ptr,
+ const char* end,
+ const unser_ctx_t* ctx) {
+ const char* buf = *ptr;
switch (buf[0]) {
case BSER_INT8:
case BSER_INT16:
case BSER_INT32:
- case BSER_INT64:
- {
- int64_t ival;
- if (!bunser_int(ptr, end, &ival)) {
- return NULL;
- }
- if (ival < LONG_MIN || ival > LONG_MAX) {
- return PyLong_FromLongLong(ival);
- }
- return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
+ case BSER_INT64: {
+ int64_t ival;
+ if (!bunser_int(ptr, end, &ival)) {
+ return NULL;
}
+// Python 3 has one integer type.
+#if PY_MAJOR_VERSION >= 3
+ return PyLong_FromLongLong(ival);
+#else
+ if (ival < LONG_MIN || ival > LONG_MAX) {
+ return PyLong_FromLongLong(ival);
+ }
+ return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
+#endif // PY_MAJOR_VERSION >= 3
+ }
- case BSER_REAL:
- {
- double dval;
- memcpy(&dval, buf + 1, sizeof(dval));
- *ptr = buf + 1 + sizeof(double);
- return PyFloat_FromDouble(dval);
- }
+ case BSER_REAL: {
+ double dval;
+ memcpy(&dval, buf + 1, sizeof(dval));
+ *ptr = buf + 1 + sizeof(double);
+ return PyFloat_FromDouble(dval);
+ }
case BSER_TRUE:
*ptr = buf + 1;
@@ -816,31 +916,51 @@
Py_INCREF(Py_None);
return Py_None;
- case BSER_STRING:
- {
- const char *start;
- int64_t len;
+ case BSER_BYTESTRING: {
+ const char* start;
+ int64_t len;
- if (!bunser_string(ptr, end, &start, &len)) {
- return NULL;
- }
+ if (!bunser_bytestring(ptr, end, &start, &len)) {
+ return NULL;
+ }
- if (len > LONG_MAX) {
- PyErr_Format(PyExc_ValueError, "string too long for python");
- return NULL;
- }
-
- return PyString_FromStringAndSize(start, (long)len);
+ if (len > LONG_MAX) {
+ PyErr_Format(PyExc_ValueError, "string too long for python");
+ return NULL;
}
+ if (ctx->value_encoding != NULL) {
+ return PyUnicode_Decode(
+ start, (long)len, ctx->value_encoding, ctx->value_errors);
+ } else {
+ return PyBytes_FromStringAndSize(start, (long)len);
+ }
+ }
+
+ case BSER_UTF8STRING: {
+ const char* start;
+ int64_t len;
+
+ if (!bunser_bytestring(ptr, end, &start, &len)) {
+ return NULL;
+ }
+
+ if (len > LONG_MAX) {
+ PyErr_Format(PyExc_ValueError, "string too long for python");
+ return NULL;
+ }
+
+ return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
+ }
+
case BSER_ARRAY:
- return bunser_array(ptr, end, mutable);
+ return bunser_array(ptr, end, ctx);
case BSER_OBJECT:
- return bunser_object(ptr, end, mutable);
+ return bunser_object(ptr, end, ctx);
case BSER_TEMPLATE:
- return bunser_template(ptr, end, mutable);
+ return bunser_template(ptr, end, ctx);
default:
PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
@@ -849,102 +969,244 @@
return NULL;
}
-// Expected use case is to read a packet from the socket and
-// then call bser.pdu_len on the packet. It returns the total
-// length of the entire response that the peer is sending,
-// including the bytes already received. This allows the client
-// to compute the data size it needs to read before it can
-// decode the data
-static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
-{
- const char *start = NULL;
- const char *data = NULL;
- int datalen = 0;
- const char *end;
- int64_t expected_len, total_len;
+static int _pdu_info_helper(
+ const char* data,
+ const char* end,
+ uint32_t* bser_version_out,
+ uint32_t* bser_capabilities_out,
+ int64_t* expected_len_out,
+ off_t* position_out) {
+ uint32_t bser_version;
+ uint32_t bser_capabilities = 0;
+ int64_t expected_len;
- if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
- return NULL;
- }
- data = start;
- end = data + datalen;
-
+ const char* start;
+ start = data;
// Validate the header and length
- if (memcmp(data, EMPTY_HEADER, 2) != 0) {
+ if (memcmp(data, EMPTY_HEADER, 2) == 0) {
+ bser_version = 1;
+ } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
+ bser_version = 2;
+ } else {
PyErr_SetString(PyExc_ValueError, "invalid bser header");
- return NULL;
+ return 0;
}
data += 2;
+ if (bser_version == 2) {
+ // Expect an integer telling us what capabilities are supported by the
+ // remote server (currently unused).
+ if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
+ return 0;
+ }
+ data += sizeof(bser_capabilities);
+ }
+
// Expect an integer telling us how big the rest of the data
// should be
if (!bunser_int(&data, end, &expected_len)) {
+ return 0;
+ }
+
+ *bser_version_out = bser_version;
+ *bser_capabilities_out = (uint32_t)bser_capabilities;
+ *expected_len_out = expected_len;
+ *position_out = (off_t)(data - start);
+ return 1;
+}
+
+// This function parses the PDU header and provides info about the packet
+// Returns false if unsuccessful
+static int pdu_info_helper(
+ PyObject* self,
+ PyObject* args,
+ uint32_t* bser_version_out,
+ uint32_t* bser_capabilities_out,
+ int64_t* total_len_out) {
+ const char* start = NULL;
+ const char* data = NULL;
+ int datalen = 0;
+ const char* end;
+ int64_t expected_len;
+ off_t position;
+
+ if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
+ return 0;
+ }
+ data = start;
+ end = data + datalen;
+
+ if (!_pdu_info_helper(
+ data,
+ end,
+ bser_version_out,
+ bser_capabilities_out,
+ &expected_len,
+ &position)) {
+ return 0;
+ }
+ *total_len_out = (int64_t)(expected_len + position);
+ return 1;
+}
+
+// Expected use case is to read a packet from the socket and then call
+// bser.pdu_info on the packet. It returns the BSER version, BSER capabilities,
+// and the total length of the entire response that the peer is sending,
+// including the bytes already received. This allows the client to compute the
+// data size it needs to read before it can decode the data.
+static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
+ uint32_t version, capabilities;
+ int64_t total_len;
+ if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
+ return NULL;
+ }
+ return Py_BuildValue("kkL", version, capabilities, total_len);
+}
+
+static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
+ uint32_t version, capabilities;
+ int64_t total_len;
+ if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
+ return NULL;
+ }
+ return Py_BuildValue("L", total_len);
+}
+
+static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
+ const char* data = NULL;
+ int datalen = 0;
+ const char* start;
+ const char* end;
+ int64_t expected_len;
+ off_t position;
+ PyObject* mutable_obj = NULL;
+ const char* value_encoding = NULL;
+ const char* value_errors = NULL;
+ unser_ctx_t ctx = {1, 0};
+
+ static char* kw_list[] = {
+ "buf", "mutable", "value_encoding", "value_errors", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(
+ args,
+ kw,
+ "s#|Ozz:loads",
+ kw_list,
+ &start,
+ &datalen,
+ &mutable_obj,
+ &value_encoding,
+ &value_errors)) {
return NULL;
}
- total_len = expected_len + (data - start);
- if (total_len > LONG_MAX) {
- return PyLong_FromLongLong(total_len);
+ if (mutable_obj) {
+ ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
}
- return PyInt_FromLong((long)total_len);
-}
-
-static PyObject *bser_loads(PyObject *self, PyObject *args)
-{
- const char *data = NULL;
- int datalen = 0;
- const char *end;
- int64_t expected_len;
- int mutable = 1;
- PyObject *mutable_obj = NULL;
-
- if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
- return NULL;
+ ctx.value_encoding = value_encoding;
+ if (value_encoding == NULL) {
+ ctx.value_errors = NULL;
+ } else if (value_errors == NULL) {
+ ctx.value_errors = "strict";
+ } else {
+ ctx.value_errors = value_errors;
}
- if (mutable_obj) {
- mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
- }
-
+ data = start;
end = data + datalen;
- // Validate the header and length
- if (memcmp(data, EMPTY_HEADER, 2) != 0) {
- PyErr_SetString(PyExc_ValueError, "invalid bser header");
+ if (!_pdu_info_helper(
+ data,
+ end,
+ &ctx.bser_version,
+ &ctx.bser_capabilities,
+ &expected_len,
+ &position)) {
return NULL;
}
- data += 2;
-
- // Expect an integer telling us how big the rest of the data
- // should be
- if (!bunser_int(&data, end, &expected_len)) {
- return NULL;
- }
-
+ data = start + position;
// Verify
if (expected_len + data != end) {
PyErr_SetString(PyExc_ValueError, "bser data len != header len");
return NULL;
}
- return bser_loads_recursive(&data, end, mutable);
+ return bser_loads_recursive(&data, end, &ctx);
}
+static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
+ PyObject *load, *string;
+ PyObject* fp = NULL;
+ PyObject* mutable_obj = NULL;
+ const char* value_encoding = NULL;
+ const char* value_errors = NULL;
+
+ static char* kw_list[] = {
+ "fp", "mutable", "value_encoding", "value_errors", NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(
+ args,
+ kw,
+ "OOzz:load",
+ kw_list,
+ &fp,
+ &mutable_obj,
+ &value_encoding,
+ &value_errors)) {
+ return NULL;
+ }
+
+ load = PyImport_ImportModule("pywatchman.load");
+ if (load == NULL) {
+ return NULL;
+ }
+ string = PyObject_CallMethod(
+ load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
+ Py_DECREF(load);
+ return string;
+}
+
+// clang-format off
static PyMethodDef bser_methods[] = {
- {"loads", bser_loads, METH_VARARGS, "Deserialize string."},
- {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
- {"dumps", bser_dumps, METH_VARARGS, "Serialize string."},
+ {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
+ "Deserialize string."},
+ {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
+ "Deserialize a file object"},
+ {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
+ "Extract PDU information."},
+ {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
+ "Extract total PDU length."},
+ {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
+ "Serialize string."},
{NULL, NULL, 0, NULL}
};
-PyMODINIT_FUNC initbser(void)
-{
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef bser_module = {
+ PyModuleDef_HEAD_INIT,
+ "bser",
+ "Efficient encoding and decoding of BSER.",
+ -1,
+ bser_methods
+};
+// clang-format on
+
+PyMODINIT_FUNC PyInit_bser(void) {
+ PyObject* mod;
+
+ mod = PyModule_Create(&bser_module);
+ PyType_Ready(&bserObjectType);
+
+ return mod;
+}
+#else
+
+PyMODINIT_FUNC initbser(void) {
(void)Py_InitModule("bser", bser_methods);
PyType_Ready(&bserObjectType);
}
+#endif // PY_MAJOR_VERSION >= 3
/* vim:ts=2:sw=2:et:
*/
-
-// no-check-code -- this is a 3rd party library
--- a/hgext/fsmonitor/pywatchman/capabilities.py Thu Dec 22 11:07:59 2016 -0800
+++ b/hgext/fsmonitor/pywatchman/capabilities.py Thu Dec 22 11:22:32 2016 -0800
@@ -26,6 +26,11 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
import re
def parse_version(vstr):
@@ -65,5 +70,3 @@
vers['error'] = 'client required capability `' + name + \
'` is not supported by this server'
return vers
-
-# no-check-code -- this is a 3rd party library
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fsmonitor/pywatchman/compat.py Thu Dec 22 11:22:32 2016 -0800
@@ -0,0 +1,65 @@
+# Copyright 2016-present Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name Facebook nor the names of its contributors may be used to
+# endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+'''Compatibility module across Python 2 and 3.'''
+
+import sys
+
+PYTHON3 = sys.version_info >= (3, 0)
+
+# This is adapted from https://bitbucket.org/gutworth/six, and used under the
+# MIT license. See LICENSE for a full copyright notice.
+if PYTHON3:
+ def reraise(tp, value, tb=None):
+ try:
+ if value is None:
+ value = tp()
+ if value.__traceback__ is not tb:
+ raise value.with_traceback(tb)
+ raise value
+ finally:
+ value = None
+ tb = None
+else:
+ exec('''
+def reraise(tp, value, tb=None):
+ try:
+ raise tp, value, tb
+ finally:
+ tb = None
+'''.strip())
+
+if PYTHON3:
+ UNICODE = str
+else:
+ UNICODE = unicode
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fsmonitor/pywatchman/encoding.py Thu Dec 22 11:22:32 2016 -0800
@@ -0,0 +1,73 @@
+# Copyright 2016-present Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name Facebook nor the names of its contributors may be used to
+# endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+'''Module to deal with filename encoding on the local system, as returned by
+Watchman.'''
+
+import sys
+
+from . import (
+ compat,
+)
+
+if compat.PYTHON3:
+ default_local_errors = 'surrogateescape'
+
+ def get_local_encoding():
+ if sys.platform == 'win32':
+ # Watchman always returns UTF-8 encoded strings on Windows.
+ return 'utf-8'
+ # On the Python 3 versions we support, sys.getfilesystemencoding never
+ # returns None.
+ return sys.getfilesystemencoding()
+else:
+ # Python 2 doesn't support surrogateescape, so use 'strict' by
+ # default. Users can register a custom surrogateescape error handler and use
+ # that if they so desire.
+ default_local_errors = 'strict'
+
+ def get_local_encoding():
+ if sys.platform == 'win32':
+ # Watchman always returns UTF-8 encoded strings on Windows.
+ return 'utf-8'
+ fsencoding = sys.getfilesystemencoding()
+ if fsencoding is None:
+ # This is very unlikely to happen, but if it does, just use UTF-8
+ fsencoding = 'utf-8'
+ return fsencoding
+
+def encode_local(s):
+ return s.encode(get_local_encoding(), default_local_errors)
+
+def decode_local(bs):
+ return bs.decode(get_local_encoding(), default_local_errors)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/fsmonitor/pywatchman/load.py Thu Dec 22 11:22:32 2016 -0800
@@ -0,0 +1,107 @@
+# Copyright 2016 Facebook, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name Facebook nor the names of its contributors may be used to
+# endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+try:
+ from . import bser
+except ImportError:
+ from . import pybser as bser
+
+import ctypes
+
+EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
+
+
+def _read_bytes(fp, buf):
+ """Read bytes from a file-like object
+
+ @param fp: File-like object that implements read(int)
+ @type fp: file
+
+ @param buf: Buffer to read into
+ @type buf: bytes
+
+ @return: buf
+ """
+
+ # Do the first read without resizing the input buffer
+ offset = 0
+ remaining = len(buf)
+ while remaining > 0:
+ l = fp.readinto((ctypes.c_char * remaining).from_buffer(buf, offset))
+ if l is None or l == 0:
+ return offset
+ offset += l
+ remaining -= l
+ return offset
+
+
+def load(fp, mutable=True, value_encoding=None, value_errors=None):
+ """Deserialize a BSER-encoded blob.
+
+ @param fp: The file-object to deserialize.
+ @type file:
+
+ @param mutable: Whether to return mutable results.
+ @type mutable: bool
+
+ @param value_encoding: Optional codec to use to decode values. If
+ unspecified or None, return values as bytestrings.
+ @type value_encoding: str
+
+ @param value_errors: Optional error handler for codec. 'strict' by default.
+ The other most common argument is 'surrogateescape' on
+ Python 3. If value_encoding is None, this is ignored.
+ @type value_errors: str
+ """
+ buf = ctypes.create_string_buffer(8192)
+ SNIFF_BUFFER_SIZE = len(EMPTY_HEADER)
+ header = (ctypes.c_char * SNIFF_BUFFER_SIZE).from_buffer(buf)
+ read_len = _read_bytes(fp, header)
+ if read_len < len(header):
+ return None
+
+ total_len = bser.pdu_len(buf)
+ if total_len > len(buf):
+ ctypes.resize(buf, total_len)
+
+ body = (ctypes.c_char * (total_len - len(header))).from_buffer(
+ buf, len(header))
+ read_len = _read_bytes(fp, body)
+ if read_len < len(body):
+ raise RuntimeError('bser data ended early')
+
+ return bser.loads(
+ (ctypes.c_char * total_len).from_buffer(buf, 0),
+ mutable,
+ value_encoding,
+ value_errors)
--- a/hgext/fsmonitor/pywatchman/msc_stdint.h Thu Dec 22 11:07:59 2016 -0800
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,260 +0,0 @@
-// no-check-code
-// ISO C9x compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-// Copyright (c) 2006-2013 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// 1. Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the product nor the names of its contributors may
-// be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#if _MSC_VER >= 1600 // [
-#include <stdint.h>
-#else // ] _MSC_VER >= 1600 [
-
-#include <limits.h>
-
-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
-// or compiler give many errors like this:
-// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
-extern "C" {
-#endif
-# include <wchar.h>
-#ifdef __cplusplus
-}
-#endif
-
-// Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-# define _W64 __w64
-# else
-# define _W64
-# endif
-#endif
-
-
-// 7.18.1 Integer types
-
-// 7.18.1.1 Exact-width integer types
-
-// Visual Studio 6 and Embedded Visual C++ 4 doesn't
-// realize that, e.g. char has the same size as __int8
-// so we give up on __intX for them.
-#if (_MSC_VER < 1300)
- typedef signed char int8_t;
- typedef signed short int16_t;
- typedef signed int int32_t;
- typedef unsigned char uint8_t;
- typedef unsigned short uint16_t;
- typedef unsigned int uint32_t;
-#else
- typedef signed __int8 int8_t;
- typedef signed __int16 int16_t;
- typedef signed __int32 int32_t;
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int16 uint16_t;
- typedef unsigned __int32 uint32_t;
-#endif
-typedef signed __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-
-
-// 7.18.1.2 Minimum-width integer types
-typedef int8_t int_least8_t;
-typedef int16_t int_least16_t;
-typedef int32_t int_least32_t;
-typedef int64_t int_least64_t;
-typedef uint8_t uint_least8_t;
-typedef uint16_t uint_least16_t;
-typedef uint32_t uint_least32_t;
-typedef uint64_t uint_least64_t;
-
-// 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t int_fast8_t;
-typedef int16_t int_fast16_t;
-typedef int32_t int_fast32_t;
-typedef int64_t int_fast64_t;
-typedef uint8_t uint_fast8_t;
-typedef uint16_t uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-typedef uint64_t uint_fast64_t;
-
-// 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
- typedef signed __int64 intptr_t;
- typedef unsigned __int64 uintptr_t;
-#else // _WIN64 ][
- typedef _W64 signed int intptr_t;
- typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
-
-// 7.18.1.5 Greatest-width integer types
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
-
-
-// 7.18.2 Limits of specified-width integer types
-
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
-
-// 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN ((int8_t)_I8_MIN)
-#define INT8_MAX _I8_MAX
-#define INT16_MIN ((int16_t)_I16_MIN)
-#define INT16_MAX _I16_MAX
-#define INT32_MIN ((int32_t)_I32_MIN)
-#define INT32_MAX _I32_MAX
-#define INT64_MIN ((int64_t)_I64_MIN)
-#define INT64_MAX _I64_MAX
-#define UINT8_MAX _UI8_MAX
-#define UINT16_MAX _UI16_MAX
-#define UINT32_MAX _UI32_MAX
-#define UINT64_MAX _UI64_MAX
-
-// 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN INT8_MIN
-#define INT_LEAST8_MAX INT8_MAX
-#define INT_LEAST16_MIN INT16_MIN
-#define INT_LEAST16_MAX INT16_MAX
-#define INT_LEAST32_MIN INT32_MIN
-#define INT_LEAST32_MAX INT32_MAX
-#define INT_LEAST64_MIN INT64_MIN
-#define INT_LEAST64_MAX INT64_MAX
-#define UINT_LEAST8_MAX UINT8_MAX
-#define UINT_LEAST16_MAX UINT16_MAX
-#define UINT_LEAST32_MAX UINT32_MAX
-#define UINT_LEAST64_MAX UINT64_MAX
-
-// 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN INT8_MIN
-#define INT_FAST8_MAX INT8_MAX
-#define INT_FAST16_MIN INT16_MIN
-#define INT_FAST16_MAX INT16_MAX
-#define INT_FAST32_MIN INT32_MIN
-#define INT_FAST32_MAX INT32_MAX
-#define INT_FAST64_MIN INT64_MIN
-#define INT_FAST64_MAX INT64_MAX
-#define UINT_FAST8_MAX UINT8_MAX
-#define UINT_FAST16_MAX UINT16_MAX
-#define UINT_FAST32_MAX UINT32_MAX
-#define UINT_FAST64_MAX UINT64_MAX
-
-// 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-# define INTPTR_MIN INT64_MIN
-# define INTPTR_MAX INT64_MAX
-# define UINTPTR_MAX UINT64_MAX
-#else // _WIN64 ][
-# define INTPTR_MIN INT32_MIN
-# define INTPTR_MAX INT32_MAX
-# define UINTPTR_MAX UINT32_MAX
-#endif // _WIN64 ]
-
-// 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN INT64_MIN
-#define INTMAX_MAX INT64_MAX
-#define UINTMAX_MAX UINT64_MAX
-
-// 7.18.3 Limits of other integer types
-
-#ifdef _WIN64 // [
-# define PTRDIFF_MIN _I64_MIN
-# define PTRDIFF_MAX _I64_MAX
-#else // _WIN64 ][
-# define PTRDIFF_MIN _I32_MIN
-# define PTRDIFF_MAX _I32_MAX
-#endif // _WIN64 ]
-
-#define SIG_ATOMIC_MIN INT_MIN
-#define SIG_ATOMIC_MAX INT_MAX
-
-#ifndef SIZE_MAX // [
-# ifdef _WIN64 // [
-# define SIZE_MAX _UI64_MAX
-# else // _WIN64 ][
-# define SIZE_MAX _UI32_MAX
-# endif // _WIN64 ]
-#endif // SIZE_MAX ]
-
-// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-# define WCHAR_MIN 0
-#endif // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-# define WCHAR_MAX _UI16_MAX
-#endif // WCHAR_MAX ]
-
-#define WINT_MIN 0
-#define WINT_MAX _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
-
-
-// 7.18.4 Limits of other integer types
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val) val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val) val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
-// Check out Issue 9 for the details.
-#ifndef INTMAX_C // [
-# define INTMAX_C INT64_C
-#endif // INTMAX_C ]
-#ifndef UINTMAX_C // [
-# define UINTMAX_C UINT64_C
-#endif // UINTMAX_C ]
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-#endif // _MSC_VER >= 1600 ]
-
-#endif // _MSC_STDINT_H_ ]
--- a/hgext/fsmonitor/pywatchman/pybser.py Thu Dec 22 11:07:59 2016 -0800
+++ b/hgext/fsmonitor/pywatchman/pybser.py Thu Dec 22 11:22:32 2016 -0800
@@ -26,33 +26,51 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# no unicode literals
+
+import binascii
import collections
import ctypes
import struct
import sys
-BSER_ARRAY = '\x00'
-BSER_OBJECT = '\x01'
-BSER_STRING = '\x02'
-BSER_INT8 = '\x03'
-BSER_INT16 = '\x04'
-BSER_INT32 = '\x05'
-BSER_INT64 = '\x06'
-BSER_REAL = '\x07'
-BSER_TRUE = '\x08'
-BSER_FALSE = '\x09'
-BSER_NULL = '\x0a'
-BSER_TEMPLATE = '\x0b'
-BSER_SKIP = '\x0c'
+from . import (
+ compat,
+)
+
+BSER_ARRAY = b'\x00'
+BSER_OBJECT = b'\x01'
+BSER_BYTESTRING = b'\x02'
+BSER_INT8 = b'\x03'
+BSER_INT16 = b'\x04'
+BSER_INT32 = b'\x05'
+BSER_INT64 = b'\x06'
+BSER_REAL = b'\x07'
+BSER_TRUE = b'\x08'
+BSER_FALSE = b'\x09'
+BSER_NULL = b'\x0a'
+BSER_TEMPLATE = b'\x0b'
+BSER_SKIP = b'\x0c'
+BSER_UTF8STRING = b'\x0d'
+
+if compat.PYTHON3:
+ STRING_TYPES = (str, bytes)
+ unicode = str
+ def tobytes(i):
+ return str(i).encode('ascii')
+ long = int
+else:
+ STRING_TYPES = (unicode, str)
+ tobytes = bytes
# Leave room for the serialization header, which includes
# our overall length. To make things simpler, we'll use an
# int32 for the header
-EMPTY_HEADER = "\x00\x01\x05\x00\x00\x00\x00"
-
-# Python 3 conditional for supporting Python 2's int/long types
-if sys.version_info > (3,):
- long = int
+EMPTY_HEADER = b"\x00\x01\x05\x00\x00\x00\x00"
+EMPTY_HEADER_V2 = b"\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
def _int_size(x):
"""Return the smallest size int that can store the value"""
@@ -67,13 +85,28 @@
else:
raise RuntimeError('Cannot represent value: ' + str(x))
+def _buf_pos(buf, pos):
+ ret = buf[pos]
+ # In Python 2, buf is a str array so buf[pos] is a string. In Python 3, buf
+ # is a bytes array and buf[pos] is an integer.
+ if compat.PYTHON3:
+ ret = bytes((ret,))
+ return ret
class _bser_buffer(object):
- def __init__(self):
+ def __init__(self, version):
+ self.bser_version = version
self.buf = ctypes.create_string_buffer(8192)
- struct.pack_into(str(len(EMPTY_HEADER)) + 's', self.buf, 0, EMPTY_HEADER)
- self.wpos = len(EMPTY_HEADER)
+ if self.bser_version == 1:
+ struct.pack_into(tobytes(len(EMPTY_HEADER)) + b's', self.buf, 0,
+ EMPTY_HEADER)
+ self.wpos = len(EMPTY_HEADER)
+ else:
+ assert self.bser_version == 2
+ struct.pack_into(tobytes(len(EMPTY_HEADER_V2)) + b's', self.buf, 0,
+ EMPTY_HEADER_V2)
+ self.wpos = len(EMPTY_HEADER_V2)
def ensure_size(self, size):
while ctypes.sizeof(self.buf) - self.wpos < size:
@@ -84,13 +117,13 @@
to_write = size + 1
self.ensure_size(to_write)
if size == 1:
- struct.pack_into('=cb', self.buf, self.wpos, BSER_INT8, val)
+ struct.pack_into(b'=cb', self.buf, self.wpos, BSER_INT8, val)
elif size == 2:
- struct.pack_into('=ch', self.buf, self.wpos, BSER_INT16, val)
+ struct.pack_into(b'=ch', self.buf, self.wpos, BSER_INT16, val)
elif size == 4:
- struct.pack_into('=ci', self.buf, self.wpos, BSER_INT32, val)
+ struct.pack_into(b'=ci', self.buf, self.wpos, BSER_INT32, val)
elif size == 8:
- struct.pack_into('=cq', self.buf, self.wpos, BSER_INT64, val)
+ struct.pack_into(b'=cq', self.buf, self.wpos, BSER_INT64, val)
else:
raise RuntimeError('Cannot represent this long value')
self.wpos += to_write
@@ -104,13 +137,17 @@
to_write = 2 + size + s_len
self.ensure_size(to_write)
if size == 1:
- struct.pack_into('=ccb' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT8, s_len, s)
+ struct.pack_into(b'=ccb' + tobytes(s_len) + b's', self.buf,
+ self.wpos, BSER_BYTESTRING, BSER_INT8, s_len, s)
elif size == 2:
- struct.pack_into('=cch' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT16, s_len, s)
+ struct.pack_into(b'=cch' + tobytes(s_len) + b's', self.buf,
+ self.wpos, BSER_BYTESTRING, BSER_INT16, s_len, s)
elif size == 4:
- struct.pack_into('=cci' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT32, s_len, s)
+ struct.pack_into(b'=cci' + tobytes(s_len) + b's', self.buf,
+ self.wpos, BSER_BYTESTRING, BSER_INT32, s_len, s)
elif size == 8:
- struct.pack_into('=ccq' + str(s_len) + 's', self.buf, self.wpos, BSER_STRING, BSER_INT64, s_len, s)
+ struct.pack_into(b'=ccq' + tobytes(s_len) + b's', self.buf,
+ self.wpos, BSER_BYTESTRING, BSER_INT64, s_len, s)
else:
raise RuntimeError('Cannot represent this string value')
self.wpos += to_write
@@ -124,54 +161,68 @@
to_encode = BSER_TRUE
else:
to_encode = BSER_FALSE
- struct.pack_into('=c', self.buf, self.wpos, to_encode)
+ struct.pack_into(b'=c', self.buf, self.wpos, to_encode)
self.wpos += needed
elif val is None:
needed = 1
self.ensure_size(needed)
- struct.pack_into('=c', self.buf, self.wpos, BSER_NULL)
+ struct.pack_into(b'=c', self.buf, self.wpos, BSER_NULL)
self.wpos += needed
elif isinstance(val, (int, long)):
self.append_long(val)
- elif isinstance(val, (str, unicode)):
+ elif isinstance(val, STRING_TYPES):
self.append_string(val)
elif isinstance(val, float):
needed = 9
self.ensure_size(needed)
- struct.pack_into('=cd', self.buf, self.wpos, BSER_REAL, val)
+ struct.pack_into(b'=cd', self.buf, self.wpos, BSER_REAL, val)
self.wpos += needed
- elif isinstance(val, collections.Mapping) and isinstance(val, collections.Sized):
+ elif isinstance(val, collections.Mapping) and \
+ isinstance(val, collections.Sized):
val_len = len(val)
size = _int_size(val_len)
needed = 2 + size
self.ensure_size(needed)
if size == 1:
- struct.pack_into('=ccb', self.buf, self.wpos, BSER_OBJECT, BSER_INT8, val_len)
+ struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_OBJECT,
+ BSER_INT8, val_len)
elif size == 2:
- struct.pack_into('=cch', self.buf, self.wpos, BSER_OBJECT, BSER_INT16, val_len)
+ struct.pack_into(b'=cch', self.buf, self.wpos, BSER_OBJECT,
+ BSER_INT16, val_len)
elif size == 4:
- struct.pack_into('=cci', self.buf, self.wpos, BSER_OBJECT, BSER_INT32, val_len)
+ struct.pack_into(b'=cci', self.buf, self.wpos, BSER_OBJECT,
+ BSER_INT32, val_len)
elif size == 8:
- struct.pack_into('=ccq', self.buf, self.wpos, BSER_OBJECT, BSER_INT64, val_len)
+ struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_OBJECT,
+ BSER_INT64, val_len)
else:
raise RuntimeError('Cannot represent this mapping value')
self.wpos += needed
- for k, v in val.iteritems():
+ if compat.PYTHON3:
+ iteritems = val.items()
+ else:
+ iteritems = val.iteritems()
+ for k, v in iteritems:
self.append_string(k)
self.append_recursive(v)
- elif isinstance(val, collections.Iterable) and isinstance(val, collections.Sized):
+ elif isinstance(val, collections.Iterable) and \
+ isinstance(val, collections.Sized):
val_len = len(val)
size = _int_size(val_len)
needed = 2 + size
self.ensure_size(needed)
if size == 1:
- struct.pack_into('=ccb', self.buf, self.wpos, BSER_ARRAY, BSER_INT8, val_len)
+ struct.pack_into(b'=ccb', self.buf, self.wpos, BSER_ARRAY,
+ BSER_INT8, val_len)
elif size == 2:
- struct.pack_into('=cch', self.buf, self.wpos, BSER_ARRAY, BSER_INT16, val_len)
+ struct.pack_into(b'=cch', self.buf, self.wpos, BSER_ARRAY,
+ BSER_INT16, val_len)
elif size == 4:
- struct.pack_into('=cci', self.buf, self.wpos, BSER_ARRAY, BSER_INT32, val_len)
+ struct.pack_into(b'=cci', self.buf, self.wpos, BSER_ARRAY,
+ BSER_INT32, val_len)
elif size == 8:
- struct.pack_into('=ccq', self.buf, self.wpos, BSER_ARRAY, BSER_INT64, val_len)
+ struct.pack_into(b'=ccq', self.buf, self.wpos, BSER_ARRAY,
+ BSER_INT64, val_len)
else:
raise RuntimeError('Cannot represent this sequence value')
self.wpos += needed
@@ -181,56 +232,18 @@
raise RuntimeError('Cannot represent unknown value type')
-def dumps(obj):
- bser_buf = _bser_buffer()
+def dumps(obj, version=1, capabilities=0):
+ bser_buf = _bser_buffer(version=version)
bser_buf.append_recursive(obj)
# Now fill in the overall length
- obj_len = bser_buf.wpos - len(EMPTY_HEADER)
- struct.pack_into('=i', bser_buf.buf, 3, obj_len)
- return bser_buf.buf.raw[:bser_buf.wpos]
-
-
-def _bunser_int(buf, pos):
- try:
- int_type = buf[pos]
- except IndexError:
- raise ValueError('Invalid bser int encoding, pos out of range')
- if int_type == BSER_INT8:
- needed = 2
- fmt = '=b'
- elif int_type == BSER_INT16:
- needed = 3
- fmt = '=h'
- elif int_type == BSER_INT32:
- needed = 5
- fmt = '=i'
- elif int_type == BSER_INT64:
- needed = 9
- fmt = '=q'
+ if version == 1:
+ obj_len = bser_buf.wpos - len(EMPTY_HEADER)
+ struct.pack_into(b'=i', bser_buf.buf, 3, obj_len)
else:
- raise ValueError('Invalid bser int encoding 0x%02x' % int(int_type))
- int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
- return (int_val, pos + needed)
-
-
-def _bunser_string(buf, pos):
- str_len, pos = _bunser_int(buf, pos + 1)
- str_val = struct.unpack_from(str(str_len) + 's', buf, pos)[0]
- return (str_val, pos + str_len)
-
-
-def _bunser_array(buf, pos, mutable=True):
- arr_len, pos = _bunser_int(buf, pos + 1)
- arr = []
- for i in range(arr_len):
- arr_item, pos = _bser_loads_recursive(buf, pos, mutable)
- arr.append(arr_item)
-
- if not mutable:
- arr = tuple(arr)
-
- return arr, pos
-
+ obj_len = bser_buf.wpos - len(EMPTY_HEADER_V2)
+ struct.pack_into(b'=i', bser_buf.buf, 2, capabilities)
+ struct.pack_into(b'=i', bser_buf.buf, 7, obj_len)
+ return bser_buf.buf.raw[:bser_buf.wpos]
# This is a quack-alike with the bserObjectType in bser.c
# It provides by getattr accessors and getitem for both index
@@ -260,100 +273,212 @@
def __len__(self):
return len(self._keys)
-def _bunser_object(buf, pos, mutable=True):
- obj_len, pos = _bunser_int(buf, pos + 1)
- if mutable:
- obj = {}
- else:
- keys = []
- vals = []
+class Bunser(object):
+ def __init__(self, mutable=True, value_encoding=None, value_errors=None):
+ self.mutable = mutable
+ self.value_encoding = value_encoding
+
+ if value_encoding is None:
+ self.value_errors = None
+ elif value_errors is None:
+ self.value_errors = 'strict'
+ else:
+ self.value_errors = value_errors
- for i in range(obj_len):
- key, pos = _bunser_string(buf, pos)
- val, pos = _bser_loads_recursive(buf, pos, mutable)
- if mutable:
- obj[key] = val
+ @staticmethod
+ def unser_int(buf, pos):
+ try:
+ int_type = _buf_pos(buf, pos)
+ except IndexError:
+ raise ValueError('Invalid bser int encoding, pos out of range')
+ if int_type == BSER_INT8:
+ needed = 2
+ fmt = b'=b'
+ elif int_type == BSER_INT16:
+ needed = 3
+ fmt = b'=h'
+ elif int_type == BSER_INT32:
+ needed = 5
+ fmt = b'=i'
+ elif int_type == BSER_INT64:
+ needed = 9
+ fmt = b'=q'
else:
- keys.append(key)
- vals.append(val)
+ raise ValueError('Invalid bser int encoding 0x%s' %
+ binascii.hexlify(int_type).decode('ascii'))
+ int_val = struct.unpack_from(fmt, buf, pos + 1)[0]
+ return (int_val, pos + needed)
- if not mutable:
- obj = _BunserDict(keys, vals)
-
- return obj, pos
-
+ def unser_utf8_string(self, buf, pos):
+ str_len, pos = self.unser_int(buf, pos + 1)
+ str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
+ return (str_val.decode('utf-8'), pos + str_len)
-def _bunser_template(buf, pos, mutable=True):
- if buf[pos + 1] != BSER_ARRAY:
- raise RuntimeError('Expect ARRAY to follow TEMPLATE')
- keys, pos = _bunser_array(buf, pos + 1)
- nitems, pos = _bunser_int(buf, pos)
- arr = []
- for i in range(nitems):
- if mutable:
+ def unser_bytestring(self, buf, pos):
+ str_len, pos = self.unser_int(buf, pos + 1)
+ str_val = struct.unpack_from(tobytes(str_len) + b's', buf, pos)[0]
+ if self.value_encoding is not None:
+ str_val = str_val.decode(self.value_encoding, self.value_errors)
+ # str_len stays the same because that's the length in bytes
+ return (str_val, pos + str_len)
+
+ def unser_array(self, buf, pos):
+ arr_len, pos = self.unser_int(buf, pos + 1)
+ arr = []
+ for i in range(arr_len):
+ arr_item, pos = self.loads_recursive(buf, pos)
+ arr.append(arr_item)
+
+ if not self.mutable:
+ arr = tuple(arr)
+
+ return arr, pos
+
+ def unser_object(self, buf, pos):
+ obj_len, pos = self.unser_int(buf, pos + 1)
+ if self.mutable:
obj = {}
else:
+ keys = []
vals = []
- for keyidx in range(len(keys)):
- if buf[pos] == BSER_SKIP:
- pos += 1
- ele = None
+ for i in range(obj_len):
+ key, pos = self.unser_utf8_string(buf, pos)
+ val, pos = self.loads_recursive(buf, pos)
+ if self.mutable:
+ obj[key] = val
else:
- ele, pos = _bser_loads_recursive(buf, pos, mutable)
+ keys.append(key)
+ vals.append(val)
- if mutable:
- key = keys[keyidx]
- obj[key] = ele
- else:
- vals.append(ele)
-
- if not mutable:
+ if not self.mutable:
obj = _BunserDict(keys, vals)
- arr.append(obj)
- return arr, pos
+ return obj, pos
+
+ def unser_template(self, buf, pos):
+ val_type = _buf_pos(buf, pos + 1)
+ if val_type != BSER_ARRAY:
+ raise RuntimeError('Expect ARRAY to follow TEMPLATE')
+ # force UTF-8 on keys
+ keys_bunser = Bunser(mutable=self.mutable, value_encoding='utf-8')
+ keys, pos = keys_bunser.unser_array(buf, pos + 1)
+ nitems, pos = self.unser_int(buf, pos)
+ arr = []
+ for i in range(nitems):
+ if self.mutable:
+ obj = {}
+ else:
+ vals = []
+
+ for keyidx in range(len(keys)):
+ if _buf_pos(buf, pos) == BSER_SKIP:
+ pos += 1
+ ele = None
+ else:
+ ele, pos = self.loads_recursive(buf, pos)
+
+ if self.mutable:
+ key = keys[keyidx]
+ obj[key] = ele
+ else:
+ vals.append(ele)
+
+ if not self.mutable:
+ obj = _BunserDict(keys, vals)
+
+ arr.append(obj)
+ return arr, pos
+
+ def loads_recursive(self, buf, pos):
+ val_type = _buf_pos(buf, pos)
+ if (val_type == BSER_INT8 or val_type == BSER_INT16 or
+ val_type == BSER_INT32 or val_type == BSER_INT64):
+ return self.unser_int(buf, pos)
+ elif val_type == BSER_REAL:
+ val = struct.unpack_from(b'=d', buf, pos + 1)[0]
+ return (val, pos + 9)
+ elif val_type == BSER_TRUE:
+ return (True, pos + 1)
+ elif val_type == BSER_FALSE:
+ return (False, pos + 1)
+ elif val_type == BSER_NULL:
+ return (None, pos + 1)
+ elif val_type == BSER_BYTESTRING:
+ return self.unser_bytestring(buf, pos)
+ elif val_type == BSER_UTF8STRING:
+ return self.unser_utf8_string(buf, pos)
+ elif val_type == BSER_ARRAY:
+ return self.unser_array(buf, pos)
+ elif val_type == BSER_OBJECT:
+ return self.unser_object(buf, pos)
+ elif val_type == BSER_TEMPLATE:
+ return self.unser_template(buf, pos)
+ else:
+ raise ValueError('unhandled bser opcode 0x%s' %
+ binascii.hexlify(val_type).decode('ascii'))
-def _bser_loads_recursive(buf, pos, mutable=True):
- val_type = buf[pos]
- if (val_type == BSER_INT8 or val_type == BSER_INT16 or
- val_type == BSER_INT32 or val_type == BSER_INT64):
- return _bunser_int(buf, pos)
- elif val_type == BSER_REAL:
- val = struct.unpack_from('=d', buf, pos + 1)[0]
- return (val, pos + 9)
- elif val_type == BSER_TRUE:
- return (True, pos + 1)
- elif val_type == BSER_FALSE:
- return (False, pos + 1)
- elif val_type == BSER_NULL:
- return (None, pos + 1)
- elif val_type == BSER_STRING:
- return _bunser_string(buf, pos)
- elif val_type == BSER_ARRAY:
- return _bunser_array(buf, pos, mutable)
- elif val_type == BSER_OBJECT:
- return _bunser_object(buf, pos, mutable)
- elif val_type == BSER_TEMPLATE:
- return _bunser_template(buf, pos, mutable)
+def _pdu_info_helper(buf):
+ bser_version = -1
+ if buf[0:2] == EMPTY_HEADER[0:2]:
+ bser_version = 1
+ bser_capabilities = 0
+ expected_len, pos2 = Bunser.unser_int(buf, 2)
+ elif buf[0:2] == EMPTY_HEADER_V2[0:2]:
+ if len(buf) < 8:
+ raise ValueError('Invalid BSER header')
+ bser_version = 2
+ bser_capabilities = struct.unpack_from("I", buf, 2)[0]
+ expected_len, pos2 = Bunser.unser_int(buf, 6)
else:
- raise RuntimeError('unhandled bser opcode 0x%02x' % (val_type,))
+ raise ValueError('Invalid BSER header')
+
+ return bser_version, bser_capabilities, expected_len, pos2
+
+
+def pdu_info(buf):
+ info = _pdu_info_helper(buf)
+ return info[0], info[1], info[2] + info[3]
def pdu_len(buf):
- if buf[0:2] != EMPTY_HEADER[0:2]:
- raise RuntimeError('Invalid BSER header')
- expected_len, pos = _bunser_int(buf, 2)
- return expected_len + pos
+ info = _pdu_info_helper(buf)
+ return info[2] + info[3]
-def loads(buf, mutable=True):
- if buf[0:2] != EMPTY_HEADER[0:2]:
- raise RuntimeError('Invalid BSER header')
- expected_len, pos = _bunser_int(buf, 2)
+def loads(buf, mutable=True, value_encoding=None, value_errors=None):
+ """Deserialize a BSER-encoded blob.
+
+ @param buf: The buffer to deserialize.
+ @type buf: bytes
+
+ @param mutable: Whether to return mutable results.
+ @type mutable: bool
+
+ @param value_encoding: Optional codec to use to decode values. If
+ unspecified or None, return values as bytestrings.
+ @type value_encoding: str
+
+ @param value_errors: Optional error handler for codec. 'strict' by default.
+ The other most common argument is 'surrogateescape' on
+ Python 3. If value_encoding is None, this is ignored.
+ @type value_errors: str
+ """
+
+ info = _pdu_info_helper(buf)
+ expected_len = info[2]
+ pos = info[3]
+
if len(buf) != expected_len + pos:
- raise RuntimeError('bser data len != header len')
- return _bser_loads_recursive(buf, pos, mutable)[0]
+ raise ValueError('bser data len != header len')
+
+ bunser = Bunser(mutable=mutable, value_encoding=value_encoding,
+ value_errors=value_errors)
-# no-check-code -- this is a 3rd party library
+ return bunser.loads_recursive(buf, pos)[0]
+
+
+def load(fp, mutable=True, value_encoding=None, value_errors=None):
+ from . import load
+ return load.load(fp, mutable, value_encoding, value_errors)
--- a/tests/test-check-py3-compat.t Thu Dec 22 11:07:59 2016 -0800
+++ b/tests/test-check-py3-compat.t Thu Dec 22 11:22:32 2016 -0800
@@ -15,10 +15,6 @@
contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import
contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import
contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import
- hgext/fsmonitor/pywatchman/__init__.py not using absolute_import
- hgext/fsmonitor/pywatchman/__init__.py requires print_function
- hgext/fsmonitor/pywatchman/capabilities.py not using absolute_import
- hgext/fsmonitor/pywatchman/pybser.py not using absolute_import
i18n/check-translation.py not using absolute_import
setup.py not using absolute_import
tests/test-demandimport.py not using absolute_import