Mercurial > hg
comparison hgext/fsmonitor/pywatchman/bser.c @ 30656:16f4b341288d
fsmonitor: refresh pywatchman to upstream
Update to upstream to version c77452. The refresh includes fixes to improve
windows compatibility.
There is a minor update to 'test-check-py3-compat.t' as c77452 no longer have
the py3 compatibility issues the previous version had.
# no-check-commit
author | Zack Hricz <zphricz@fb.com> |
---|---|
date | Thu, 22 Dec 2016 11:22:32 -0800 |
parents | a011080fdb7b |
children | b1f62cd39b5c |
comparison
equal
deleted
inserted
replaced
30655:f35397fe0c04 | 30656:16f4b341288d |
---|---|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ | 29 */ |
30 | 30 |
31 #include <Python.h> | 31 #include <Python.h> |
32 #include <bytesobject.h> | |
32 #ifdef _MSC_VER | 33 #ifdef _MSC_VER |
33 #define inline __inline | 34 #define inline __inline |
34 #include "msc_stdint.h" | 35 #if _MSC_VER >= 1800 |
36 #include <stdint.h> | |
37 #else | |
38 // The compiler associated with Python 2.7 on Windows doesn't ship | |
39 // with stdint.h, so define the small subset that we use here. | |
40 typedef __int8 int8_t; | |
41 typedef __int16 int16_t; | |
42 typedef __int32 int32_t; | |
43 typedef __int64 int64_t; | |
44 typedef unsigned __int8 uint8_t; | |
45 typedef unsigned __int16 uint16_t; | |
46 typedef unsigned __int32 uint32_t; | |
47 typedef unsigned __int64 uint64_t; | |
48 #define UINT32_MAX 4294967295U | |
35 #endif | 49 #endif |
36 | 50 #endif |
51 | |
52 // clang-format off | |
37 /* Return the smallest size int that can store the value */ | 53 /* Return the smallest size int that can store the value */ |
38 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \ | 54 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \ |
39 ((x) == ((int16_t)x)) ? 2 : \ | 55 ((x) == ((int16_t)x)) ? 2 : \ |
40 ((x) == ((int32_t)x)) ? 4 : 8) | 56 ((x) == ((int32_t)x)) ? 4 : 8) |
41 | 57 |
42 #define BSER_ARRAY 0x00 | 58 #define BSER_ARRAY 0x00 |
43 #define BSER_OBJECT 0x01 | 59 #define BSER_OBJECT 0x01 |
44 #define BSER_STRING 0x02 | 60 #define BSER_BYTESTRING 0x02 |
45 #define BSER_INT8 0x03 | 61 #define BSER_INT8 0x03 |
46 #define BSER_INT16 0x04 | 62 #define BSER_INT16 0x04 |
47 #define BSER_INT32 0x05 | 63 #define BSER_INT32 0x05 |
48 #define BSER_INT64 0x06 | 64 #define BSER_INT64 0x06 |
49 #define BSER_REAL 0x07 | 65 #define BSER_REAL 0x07 |
50 #define BSER_TRUE 0x08 | 66 #define BSER_TRUE 0x08 |
51 #define BSER_FALSE 0x09 | 67 #define BSER_FALSE 0x09 |
52 #define BSER_NULL 0x0a | 68 #define BSER_NULL 0x0a |
53 #define BSER_TEMPLATE 0x0b | 69 #define BSER_TEMPLATE 0x0b |
54 #define BSER_SKIP 0x0c | 70 #define BSER_SKIP 0x0c |
71 #define BSER_UTF8STRING 0x0d | |
72 // clang-format on | |
55 | 73 |
56 // An immutable object representation of BSER_OBJECT. | 74 // An immutable object representation of BSER_OBJECT. |
57 // Rather than build a hash table, key -> value are obtained | 75 // Rather than build a hash table, key -> value are obtained |
58 // by walking the list of keys to determine the offset into | 76 // by walking the list of keys to determine the offset into |
59 // the values array. The assumption is that the number of | 77 // the values array. The assumption is that the number of |
62 // so that the time overhead for this is small compared to | 80 // so that the time overhead for this is small compared to |
63 // using a proper hash table. Even with this simplistic | 81 // using a proper hash table. Even with this simplistic |
64 // approach, this is still faster for the mercurial use case | 82 // approach, this is still faster for the mercurial use case |
65 // as it helps to eliminate creating N other objects to | 83 // as it helps to eliminate creating N other objects to |
66 // represent the stat information in the hgwatchman extension | 84 // represent the stat information in the hgwatchman extension |
85 // clang-format off | |
67 typedef struct { | 86 typedef struct { |
68 PyObject_HEAD | 87 PyObject_HEAD |
69 PyObject *keys; // tuple of field names | 88 PyObject *keys; // tuple of field names |
70 PyObject *values; // tuple of values | 89 PyObject *values; // tuple of values |
71 } bserObject; | 90 } bserObject; |
72 | 91 // clang-format on |
73 static Py_ssize_t bserobj_tuple_length(PyObject *o) { | 92 |
74 bserObject *obj = (bserObject*)o; | 93 static Py_ssize_t bserobj_tuple_length(PyObject* o) { |
94 bserObject* obj = (bserObject*)o; | |
75 | 95 |
76 return PySequence_Length(obj->keys); | 96 return PySequence_Length(obj->keys); |
77 } | 97 } |
78 | 98 |
79 static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) { | 99 static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) { |
80 bserObject *obj = (bserObject*)o; | 100 bserObject* obj = (bserObject*)o; |
81 | 101 |
82 return PySequence_GetItem(obj->values, i); | 102 return PySequence_GetItem(obj->values, i); |
83 } | 103 } |
84 | 104 |
105 // clang-format off | |
85 static PySequenceMethods bserobj_sq = { | 106 static PySequenceMethods bserobj_sq = { |
86 bserobj_tuple_length, /* sq_length */ | 107 bserobj_tuple_length, /* sq_length */ |
87 0, /* sq_concat */ | 108 0, /* sq_concat */ |
88 0, /* sq_repeat */ | 109 0, /* sq_repeat */ |
89 bserobj_tuple_item, /* sq_item */ | 110 bserobj_tuple_item, /* sq_item */ |
90 0, /* sq_ass_item */ | 111 0, /* sq_ass_item */ |
91 0, /* sq_contains */ | 112 0, /* sq_contains */ |
92 0, /* sq_inplace_concat */ | 113 0, /* sq_inplace_concat */ |
93 0 /* sq_inplace_repeat */ | 114 0 /* sq_inplace_repeat */ |
94 }; | 115 }; |
95 | 116 // clang-format on |
96 static void bserobj_dealloc(PyObject *o) { | 117 |
97 bserObject *obj = (bserObject*)o; | 118 static void bserobj_dealloc(PyObject* o) { |
119 bserObject* obj = (bserObject*)o; | |
98 | 120 |
99 Py_CLEAR(obj->keys); | 121 Py_CLEAR(obj->keys); |
100 Py_CLEAR(obj->values); | 122 Py_CLEAR(obj->values); |
101 PyObject_Del(o); | 123 PyObject_Del(o); |
102 } | 124 } |
103 | 125 |
104 static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) { | 126 static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) { |
105 bserObject *obj = (bserObject*)o; | 127 bserObject* obj = (bserObject*)o; |
106 Py_ssize_t i, n; | 128 Py_ssize_t i, n; |
107 const char *namestr; | 129 PyObject* name_bytes = NULL; |
130 PyObject* ret = NULL; | |
131 const char* namestr; | |
108 | 132 |
109 if (PyIndex_Check(name)) { | 133 if (PyIndex_Check(name)) { |
110 i = PyNumber_AsSsize_t(name, PyExc_IndexError); | 134 i = PyNumber_AsSsize_t(name, PyExc_IndexError); |
111 if (i == -1 && PyErr_Occurred()) { | 135 if (i == -1 && PyErr_Occurred()) { |
112 return NULL; | 136 goto bail; |
113 } | 137 } |
114 return PySequence_GetItem(obj->values, i); | 138 ret = PySequence_GetItem(obj->values, i); |
115 } | 139 goto bail; |
116 | 140 } |
141 | |
142 // We can be passed in Unicode objects here -- we don't support anything other | |
143 // than UTF-8 for keys. | |
144 if (PyUnicode_Check(name)) { | |
145 name_bytes = PyUnicode_AsUTF8String(name); | |
146 if (name_bytes == NULL) { | |
147 goto bail; | |
148 } | |
149 namestr = PyBytes_AsString(name_bytes); | |
150 } else { | |
151 namestr = PyBytes_AsString(name); | |
152 } | |
153 | |
154 if (namestr == NULL) { | |
155 goto bail; | |
156 } | |
117 // hack^Wfeature to allow mercurial to use "st_size" to reference "size" | 157 // hack^Wfeature to allow mercurial to use "st_size" to reference "size" |
118 namestr = PyString_AsString(name); | |
119 if (!strncmp(namestr, "st_", 3)) { | 158 if (!strncmp(namestr, "st_", 3)) { |
120 namestr += 3; | 159 namestr += 3; |
121 } | 160 } |
122 | 161 |
123 n = PyTuple_GET_SIZE(obj->keys); | 162 n = PyTuple_GET_SIZE(obj->keys); |
124 for (i = 0; i < n; i++) { | 163 for (i = 0; i < n; i++) { |
125 const char *item_name = NULL; | 164 const char* item_name = NULL; |
126 PyObject *key = PyTuple_GET_ITEM(obj->keys, i); | 165 PyObject* key = PyTuple_GET_ITEM(obj->keys, i); |
127 | 166 |
128 item_name = PyString_AsString(key); | 167 item_name = PyBytes_AsString(key); |
129 if (!strcmp(item_name, namestr)) { | 168 if (!strcmp(item_name, namestr)) { |
130 return PySequence_GetItem(obj->values, i); | 169 ret = PySequence_GetItem(obj->values, i); |
131 } | 170 goto bail; |
132 } | 171 } |
133 PyErr_Format(PyExc_AttributeError, | 172 } |
134 "bserobject has no attribute '%.400s'", namestr); | 173 |
135 return NULL; | 174 PyErr_Format( |
136 } | 175 PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr); |
137 | 176 bail: |
177 Py_XDECREF(name_bytes); | |
178 return ret; | |
179 } | |
180 | |
181 // clang-format off | |
138 static PyMappingMethods bserobj_map = { | 182 static PyMappingMethods bserobj_map = { |
139 bserobj_tuple_length, /* mp_length */ | 183 bserobj_tuple_length, /* mp_length */ |
140 bserobj_getattrro, /* mp_subscript */ | 184 bserobj_getattrro, /* mp_subscript */ |
141 0 /* mp_ass_subscript */ | 185 0 /* mp_ass_subscript */ |
142 }; | 186 }; |
179 0, /* tp_dictoffset */ | 223 0, /* tp_dictoffset */ |
180 0, /* tp_init */ | 224 0, /* tp_init */ |
181 0, /* tp_alloc */ | 225 0, /* tp_alloc */ |
182 0, /* tp_new */ | 226 0, /* tp_new */ |
183 }; | 227 }; |
184 | 228 // clang-format on |
185 | 229 |
186 static PyObject *bser_loads_recursive(const char **ptr, const char *end, | 230 typedef struct loads_ctx { |
187 int mutable); | 231 int mutable; |
232 const char* value_encoding; | |
233 const char* value_errors; | |
234 uint32_t bser_version; | |
235 uint32_t bser_capabilities; | |
236 } unser_ctx_t; | |
237 | |
238 static PyObject* | |
239 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx); | |
188 | 240 |
189 static const char bser_true = BSER_TRUE; | 241 static const char bser_true = BSER_TRUE; |
190 static const char bser_false = BSER_FALSE; | 242 static const char bser_false = BSER_FALSE; |
191 static const char bser_null = BSER_NULL; | 243 static const char bser_null = BSER_NULL; |
192 static const char bser_string_hdr = BSER_STRING; | 244 static const char bser_bytestring_hdr = BSER_BYTESTRING; |
193 static const char bser_array_hdr = BSER_ARRAY; | 245 static const char bser_array_hdr = BSER_ARRAY; |
194 static const char bser_object_hdr = BSER_OBJECT; | 246 static const char bser_object_hdr = BSER_OBJECT; |
195 | 247 |
196 static inline uint32_t next_power_2(uint32_t n) | 248 static inline uint32_t next_power_2(uint32_t n) { |
197 { | |
198 n |= (n >> 16); | 249 n |= (n >> 16); |
199 n |= (n >> 8); | 250 n |= (n >> 8); |
200 n |= (n >> 4); | 251 n |= (n >> 4); |
201 n |= (n >> 2); | 252 n |= (n >> 2); |
202 n |= (n >> 1); | 253 n |= (n >> 1); |
203 return n + 1; | 254 return n + 1; |
204 } | 255 } |
205 | 256 |
206 // A buffer we use for building up the serialized result | 257 // A buffer we use for building up the serialized result |
207 struct bser_buffer { | 258 struct bser_buffer { |
208 char *buf; | 259 char* buf; |
209 int wpos, allocd; | 260 int wpos, allocd; |
261 uint32_t bser_version; | |
262 uint32_t capabilities; | |
210 }; | 263 }; |
211 typedef struct bser_buffer bser_t; | 264 typedef struct bser_buffer bser_t; |
212 | 265 |
213 static int bser_append(bser_t *bser, const char *data, uint32_t len) | 266 static int bser_append(bser_t* bser, const char* data, uint32_t len) { |
214 { | |
215 int newlen = next_power_2(bser->wpos + len); | 267 int newlen = next_power_2(bser->wpos + len); |
216 if (newlen > bser->allocd) { | 268 if (newlen > bser->allocd) { |
217 char *nbuf = realloc(bser->buf, newlen); | 269 char* nbuf = realloc(bser->buf, newlen); |
218 if (!nbuf) { | 270 if (!nbuf) { |
219 return 0; | 271 return 0; |
220 } | 272 } |
221 | 273 |
222 bser->buf = nbuf; | 274 bser->buf = nbuf; |
226 memcpy(bser->buf + bser->wpos, data, len); | 278 memcpy(bser->buf + bser->wpos, data, len); |
227 bser->wpos += len; | 279 bser->wpos += len; |
228 return 1; | 280 return 1; |
229 } | 281 } |
230 | 282 |
231 static int bser_init(bser_t *bser) | 283 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) { |
232 { | |
233 bser->allocd = 8192; | 284 bser->allocd = 8192; |
234 bser->wpos = 0; | 285 bser->wpos = 0; |
235 bser->buf = malloc(bser->allocd); | 286 bser->buf = malloc(bser->allocd); |
236 | 287 bser->bser_version = version; |
288 bser->capabilities = capabilities; | |
237 if (!bser->buf) { | 289 if (!bser->buf) { |
238 return 0; | 290 return 0; |
239 } | 291 } |
240 | 292 |
241 // Leave room for the serialization header, which includes | 293 // Leave room for the serialization header, which includes |
242 // our overall length. To make things simpler, we'll use an | 294 // our overall length. To make things simpler, we'll use an |
243 // int32 for the header | 295 // int32 for the header |
244 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00" | 296 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00" |
245 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1); | 297 |
298 // Version 2 also carries an integer indicating the capabilities. The | |
299 // capabilities integer comes before the PDU size. | |
300 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00" | |
301 if (version == 2) { | |
302 bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1); | |
303 } else { | |
304 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1); | |
305 } | |
246 | 306 |
247 return 1; | 307 return 1; |
248 } | 308 } |
249 | 309 |
250 static void bser_dtor(bser_t *bser) | 310 static void bser_dtor(bser_t* bser) { |
251 { | |
252 free(bser->buf); | 311 free(bser->buf); |
253 bser->buf = NULL; | 312 bser->buf = NULL; |
254 } | 313 } |
255 | 314 |
256 static int bser_long(bser_t *bser, int64_t val) | 315 static int bser_long(bser_t* bser, int64_t val) { |
257 { | |
258 int8_t i8; | 316 int8_t i8; |
259 int16_t i16; | 317 int16_t i16; |
260 int32_t i32; | 318 int32_t i32; |
261 int64_t i64; | 319 int64_t i64; |
262 char sz; | 320 char sz; |
263 int size = INT_SIZE(val); | 321 int size = INT_SIZE(val); |
264 char *iptr; | 322 char* iptr; |
265 | 323 |
266 switch (size) { | 324 switch (size) { |
267 case 1: | 325 case 1: |
268 sz = BSER_INT8; | 326 sz = BSER_INT8; |
269 i8 = (int8_t)val; | 327 i8 = (int8_t)val; |
283 sz = BSER_INT64; | 341 sz = BSER_INT64; |
284 i64 = (int64_t)val; | 342 i64 = (int64_t)val; |
285 iptr = (char*)&i64; | 343 iptr = (char*)&i64; |
286 break; | 344 break; |
287 default: | 345 default: |
288 PyErr_SetString(PyExc_RuntimeError, | 346 PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?"); |
289 "Cannot represent this long value!?"); | |
290 return 0; | 347 return 0; |
291 } | 348 } |
292 | 349 |
293 if (!bser_append(bser, &sz, sizeof(sz))) { | 350 if (!bser_append(bser, &sz, sizeof(sz))) { |
294 return 0; | 351 return 0; |
295 } | 352 } |
296 | 353 |
297 return bser_append(bser, iptr, size); | 354 return bser_append(bser, iptr, size); |
298 } | 355 } |
299 | 356 |
300 static int bser_string(bser_t *bser, PyObject *sval) | 357 static int bser_bytestring(bser_t* bser, PyObject* sval) { |
301 { | 358 char* buf = NULL; |
302 char *buf = NULL; | |
303 Py_ssize_t len; | 359 Py_ssize_t len; |
304 int res; | 360 int res; |
305 PyObject *utf = NULL; | 361 PyObject* utf = NULL; |
306 | 362 |
307 if (PyUnicode_Check(sval)) { | 363 if (PyUnicode_Check(sval)) { |
308 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore"); | 364 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore"); |
309 sval = utf; | 365 sval = utf; |
310 } | 366 } |
311 | 367 |
312 res = PyString_AsStringAndSize(sval, &buf, &len); | 368 res = PyBytes_AsStringAndSize(sval, &buf, &len); |
313 if (res == -1) { | 369 if (res == -1) { |
314 res = 0; | 370 res = 0; |
315 goto out; | 371 goto out; |
316 } | 372 } |
317 | 373 |
318 if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) { | 374 if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) { |
319 res = 0; | 375 res = 0; |
320 goto out; | 376 goto out; |
321 } | 377 } |
322 | 378 |
323 if (!bser_long(bser, len)) { | 379 if (!bser_long(bser, len)) { |
339 } | 395 } |
340 | 396 |
341 return res; | 397 return res; |
342 } | 398 } |
343 | 399 |
344 static int bser_recursive(bser_t *bser, PyObject *val) | 400 static int bser_recursive(bser_t* bser, PyObject* val) { |
345 { | |
346 if (PyBool_Check(val)) { | 401 if (PyBool_Check(val)) { |
347 if (val == Py_True) { | 402 if (val == Py_True) { |
348 return bser_append(bser, &bser_true, sizeof(bser_true)); | 403 return bser_append(bser, &bser_true, sizeof(bser_true)); |
349 } | 404 } |
350 return bser_append(bser, &bser_false, sizeof(bser_false)); | 405 return bser_append(bser, &bser_false, sizeof(bser_false)); |
352 | 407 |
353 if (val == Py_None) { | 408 if (val == Py_None) { |
354 return bser_append(bser, &bser_null, sizeof(bser_null)); | 409 return bser_append(bser, &bser_null, sizeof(bser_null)); |
355 } | 410 } |
356 | 411 |
412 // Python 3 has one integer type. | |
413 #if PY_MAJOR_VERSION < 3 | |
357 if (PyInt_Check(val)) { | 414 if (PyInt_Check(val)) { |
358 return bser_long(bser, PyInt_AS_LONG(val)); | 415 return bser_long(bser, PyInt_AS_LONG(val)); |
359 } | 416 } |
417 #endif // PY_MAJOR_VERSION < 3 | |
360 | 418 |
361 if (PyLong_Check(val)) { | 419 if (PyLong_Check(val)) { |
362 return bser_long(bser, PyLong_AsLongLong(val)); | 420 return bser_long(bser, PyLong_AsLongLong(val)); |
363 } | 421 } |
364 | 422 |
365 if (PyString_Check(val) || PyUnicode_Check(val)) { | 423 if (PyBytes_Check(val) || PyUnicode_Check(val)) { |
366 return bser_string(bser, val); | 424 return bser_bytestring(bser, val); |
367 } | 425 } |
368 | |
369 | 426 |
370 if (PyFloat_Check(val)) { | 427 if (PyFloat_Check(val)) { |
371 double dval = PyFloat_AS_DOUBLE(val); | 428 double dval = PyFloat_AS_DOUBLE(val); |
372 char sz = BSER_REAL; | 429 char sz = BSER_REAL; |
373 | 430 |
388 if (!bser_long(bser, len)) { | 445 if (!bser_long(bser, len)) { |
389 return 0; | 446 return 0; |
390 } | 447 } |
391 | 448 |
392 for (i = 0; i < len; i++) { | 449 for (i = 0; i < len; i++) { |
393 PyObject *ele = PyList_GET_ITEM(val, i); | 450 PyObject* ele = PyList_GET_ITEM(val, i); |
394 | 451 |
395 if (!bser_recursive(bser, ele)) { | 452 if (!bser_recursive(bser, ele)) { |
396 return 0; | 453 return 0; |
397 } | 454 } |
398 } | 455 } |
410 if (!bser_long(bser, len)) { | 467 if (!bser_long(bser, len)) { |
411 return 0; | 468 return 0; |
412 } | 469 } |
413 | 470 |
414 for (i = 0; i < len; i++) { | 471 for (i = 0; i < len; i++) { |
415 PyObject *ele = PyTuple_GET_ITEM(val, i); | 472 PyObject* ele = PyTuple_GET_ITEM(val, i); |
416 | 473 |
417 if (!bser_recursive(bser, ele)) { | 474 if (!bser_recursive(bser, ele)) { |
418 return 0; | 475 return 0; |
419 } | 476 } |
420 } | 477 } |
434 if (!bser_long(bser, len)) { | 491 if (!bser_long(bser, len)) { |
435 return 0; | 492 return 0; |
436 } | 493 } |
437 | 494 |
438 while (PyDict_Next(val, &pos, &key, &ele)) { | 495 while (PyDict_Next(val, &pos, &key, &ele)) { |
439 if (!bser_string(bser, key)) { | 496 if (!bser_bytestring(bser, key)) { |
440 return 0; | 497 return 0; |
441 } | 498 } |
442 if (!bser_recursive(bser, ele)) { | 499 if (!bser_recursive(bser, ele)) { |
443 return 0; | 500 return 0; |
444 } | 501 } |
449 | 506 |
450 PyErr_SetString(PyExc_ValueError, "Unsupported value type"); | 507 PyErr_SetString(PyExc_ValueError, "Unsupported value type"); |
451 return 0; | 508 return 0; |
452 } | 509 } |
453 | 510 |
454 static PyObject *bser_dumps(PyObject *self, PyObject *args) | 511 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) { |
455 { | |
456 PyObject *val = NULL, *res; | 512 PyObject *val = NULL, *res; |
457 bser_t bser; | 513 bser_t bser; |
458 uint32_t len; | 514 uint32_t len, bser_version = 1, bser_capabilities = 0; |
459 | 515 |
460 if (!PyArg_ParseTuple(args, "O", &val)) { | 516 static char* kw_list[] = {"val", "version", "capabilities", NULL}; |
461 return NULL; | 517 |
462 } | 518 if (!PyArg_ParseTupleAndKeywords( |
463 | 519 args, |
464 if (!bser_init(&bser)) { | 520 kw, |
521 "O|ii:dumps", | |
522 kw_list, | |
523 &val, | |
524 &bser_version, | |
525 &bser_capabilities)) { | |
526 return NULL; | |
527 } | |
528 | |
529 if (!bser_init(&bser, bser_version, bser_capabilities)) { | |
465 return PyErr_NoMemory(); | 530 return PyErr_NoMemory(); |
466 } | 531 } |
467 | 532 |
468 if (!bser_recursive(&bser, val)) { | 533 if (!bser_recursive(&bser, val)) { |
469 bser_dtor(&bser); | 534 bser_dtor(&bser); |
473 // otherwise, we've already set the error to something reasonable | 538 // otherwise, we've already set the error to something reasonable |
474 return NULL; | 539 return NULL; |
475 } | 540 } |
476 | 541 |
477 // Now fill in the overall length | 542 // Now fill in the overall length |
478 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); | 543 if (bser_version == 1) { |
479 memcpy(bser.buf + 3, &len, sizeof(len)); | 544 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); |
480 | 545 memcpy(bser.buf + 3, &len, sizeof(len)); |
481 res = PyString_FromStringAndSize(bser.buf, bser.wpos); | 546 } else { |
547 len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1); | |
548 // The BSER capabilities block comes before the PDU length | |
549 memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities)); | |
550 memcpy(bser.buf + 7, &len, sizeof(len)); | |
551 } | |
552 | |
553 res = PyBytes_FromStringAndSize(bser.buf, bser.wpos); | |
482 bser_dtor(&bser); | 554 bser_dtor(&bser); |
483 | 555 |
484 return res; | 556 return res; |
485 } | 557 } |
486 | 558 |
487 int bunser_int(const char **ptr, const char *end, int64_t *val) | 559 int bunser_int(const char** ptr, const char* end, int64_t* val) { |
488 { | |
489 int needed; | 560 int needed; |
490 const char *buf = *ptr; | 561 const char* buf = *ptr; |
491 int8_t i8; | 562 int8_t i8; |
492 int16_t i16; | 563 int16_t i16; |
493 int32_t i32; | 564 int32_t i32; |
494 int64_t i64; | 565 int64_t i64; |
495 | 566 |
505 break; | 576 break; |
506 case BSER_INT64: | 577 case BSER_INT64: |
507 needed = 9; | 578 needed = 9; |
508 break; | 579 break; |
509 default: | 580 default: |
510 PyErr_Format(PyExc_ValueError, | 581 PyErr_Format( |
511 "invalid bser int encoding 0x%02x", buf[0]); | 582 PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]); |
512 return 0; | 583 return 0; |
513 } | 584 } |
514 if (end - buf < needed) { | 585 if (end - buf < needed) { |
515 PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding"); | 586 PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding"); |
516 return 0; | 587 return 0; |
536 default: | 607 default: |
537 return 0; | 608 return 0; |
538 } | 609 } |
539 } | 610 } |
540 | 611 |
541 static int bunser_string(const char **ptr, const char *end, | 612 static int bunser_bytestring( |
542 const char **start, int64_t *len) | 613 const char** ptr, |
543 { | 614 const char* end, |
544 const char *buf = *ptr; | 615 const char** start, |
616 int64_t* len) { | |
617 const char* buf = *ptr; | |
545 | 618 |
546 // skip string marker | 619 // skip string marker |
547 buf++; | 620 buf++; |
548 if (!bunser_int(&buf, end, len)) { | 621 if (!bunser_int(&buf, end, len)) { |
549 return 0; | 622 return 0; |
557 *ptr = buf + *len; | 630 *ptr = buf + *len; |
558 *start = buf; | 631 *start = buf; |
559 return 1; | 632 return 1; |
560 } | 633 } |
561 | 634 |
562 static PyObject *bunser_array(const char **ptr, const char *end, int mutable) | 635 static PyObject* |
563 { | 636 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
564 const char *buf = *ptr; | 637 const char* buf = *ptr; |
565 int64_t nitems, i; | 638 int64_t nitems, i; |
566 PyObject *res; | 639 int mutable = ctx->mutable; |
640 PyObject* res; | |
567 | 641 |
568 // skip array header | 642 // skip array header |
569 buf++; | 643 buf++; |
570 if (!bunser_int(&buf, end, &nitems)) { | 644 if (!bunser_int(&buf, end, &nitems)) { |
571 return 0; | 645 return 0; |
582 } else { | 656 } else { |
583 res = PyTuple_New((Py_ssize_t)nitems); | 657 res = PyTuple_New((Py_ssize_t)nitems); |
584 } | 658 } |
585 | 659 |
586 for (i = 0; i < nitems; i++) { | 660 for (i = 0; i < nitems; i++) { |
587 PyObject *ele = bser_loads_recursive(ptr, end, mutable); | 661 PyObject* ele = bser_loads_recursive(ptr, end, ctx); |
588 | 662 |
589 if (!ele) { | 663 if (!ele) { |
590 Py_DECREF(res); | 664 Py_DECREF(res); |
591 return NULL; | 665 return NULL; |
592 } | 666 } |
600 } | 674 } |
601 | 675 |
602 return res; | 676 return res; |
603 } | 677 } |
604 | 678 |
605 static PyObject *bunser_object(const char **ptr, const char *end, | 679 static PyObject* |
606 int mutable) | 680 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
607 { | 681 const char* buf = *ptr; |
608 const char *buf = *ptr; | |
609 int64_t nitems, i; | 682 int64_t nitems, i; |
610 PyObject *res; | 683 int mutable = ctx->mutable; |
611 bserObject *obj; | 684 PyObject* res; |
685 bserObject* obj; | |
612 | 686 |
613 // skip array header | 687 // skip array header |
614 buf++; | 688 buf++; |
615 if (!bunser_int(&buf, end, &nitems)) { | 689 if (!bunser_int(&buf, end, &nitems)) { |
616 return 0; | 690 return 0; |
625 obj->values = PyTuple_New((Py_ssize_t)nitems); | 699 obj->values = PyTuple_New((Py_ssize_t)nitems); |
626 res = (PyObject*)obj; | 700 res = (PyObject*)obj; |
627 } | 701 } |
628 | 702 |
629 for (i = 0; i < nitems; i++) { | 703 for (i = 0; i < nitems; i++) { |
630 const char *keystr; | 704 const char* keystr; |
631 int64_t keylen; | 705 int64_t keylen; |
632 PyObject *key; | 706 PyObject* key; |
633 PyObject *ele; | 707 PyObject* ele; |
634 | 708 |
635 if (!bunser_string(ptr, end, &keystr, &keylen)) { | 709 if (!bunser_bytestring(ptr, end, &keystr, &keylen)) { |
636 Py_DECREF(res); | 710 Py_DECREF(res); |
637 return NULL; | 711 return NULL; |
638 } | 712 } |
639 | 713 |
640 if (keylen > LONG_MAX) { | 714 if (keylen > LONG_MAX) { |
641 PyErr_Format(PyExc_ValueError, "string too big for python"); | 715 PyErr_Format(PyExc_ValueError, "string too big for python"); |
642 Py_DECREF(res); | 716 Py_DECREF(res); |
643 return NULL; | 717 return NULL; |
644 } | 718 } |
645 | 719 |
646 key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen); | 720 if (mutable) { |
721 // This will interpret the key as UTF-8. | |
722 key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen); | |
723 } else { | |
724 // For immutable objects we'll manage key lookups, so we can avoid going | |
725 // through the Unicode APIs. This avoids a potentially expensive and | |
726 // definitely unnecessary conversion to UTF-16 and back for Python 2. | |
727 // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use | |
728 // Unicode keys there without an appreciable performance loss. | |
729 key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen); | |
730 } | |
731 | |
647 if (!key) { | 732 if (!key) { |
648 Py_DECREF(res); | 733 Py_DECREF(res); |
649 return NULL; | 734 return NULL; |
650 } | 735 } |
651 | 736 |
652 ele = bser_loads_recursive(ptr, end, mutable); | 737 ele = bser_loads_recursive(ptr, end, ctx); |
653 | 738 |
654 if (!ele) { | 739 if (!ele) { |
655 Py_DECREF(key); | 740 Py_DECREF(key); |
656 Py_DECREF(res); | 741 Py_DECREF(res); |
657 return NULL; | 742 return NULL; |
669 } | 754 } |
670 | 755 |
671 return res; | 756 return res; |
672 } | 757 } |
673 | 758 |
674 static PyObject *bunser_template(const char **ptr, const char *end, | 759 static PyObject* |
675 int mutable) | 760 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
676 { | 761 const char* buf = *ptr; |
677 const char *buf = *ptr; | |
678 int64_t nitems, i; | 762 int64_t nitems, i; |
679 PyObject *arrval; | 763 int mutable = ctx->mutable; |
680 PyObject *keys; | 764 PyObject* arrval; |
765 PyObject* keys; | |
681 Py_ssize_t numkeys, keyidx; | 766 Py_ssize_t numkeys, keyidx; |
767 unser_ctx_t keys_ctx = {0}; | |
768 if (mutable) { | |
769 keys_ctx.mutable = 1; | |
770 // Decode keys as UTF-8 in this case. | |
771 keys_ctx.value_encoding = "utf-8"; | |
772 keys_ctx.value_errors = "strict"; | |
773 } else { | |
774 // Treat keys as bytestrings in this case -- we'll do Unicode conversions at | |
775 // lookup time. | |
776 } | |
682 | 777 |
683 if (buf[1] != BSER_ARRAY) { | 778 if (buf[1] != BSER_ARRAY) { |
684 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE"); | 779 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE"); |
685 return NULL; | 780 return NULL; |
686 } | 781 } |
687 | 782 |
688 // skip header | 783 // skip header |
689 buf++; | 784 buf++; |
690 *ptr = buf; | 785 *ptr = buf; |
691 | 786 |
692 // Load template keys | 787 // Load template keys. |
693 keys = bunser_array(ptr, end, mutable); | 788 // For keys we don't want to do any decoding right now. |
789 keys = bunser_array(ptr, end, &keys_ctx); | |
694 if (!keys) { | 790 if (!keys) { |
695 return NULL; | 791 return NULL; |
696 } | 792 } |
697 | 793 |
698 numkeys = PySequence_Length(keys); | 794 numkeys = PySequence_Length(keys); |
714 Py_DECREF(keys); | 810 Py_DECREF(keys); |
715 return NULL; | 811 return NULL; |
716 } | 812 } |
717 | 813 |
718 for (i = 0; i < nitems; i++) { | 814 for (i = 0; i < nitems; i++) { |
719 PyObject *dict = NULL; | 815 PyObject* dict = NULL; |
720 bserObject *obj = NULL; | 816 bserObject* obj = NULL; |
721 | 817 |
722 if (mutable) { | 818 if (mutable) { |
723 dict = PyDict_New(); | 819 dict = PyDict_New(); |
724 } else { | 820 } else { |
725 obj = PyObject_New(bserObject, &bserObjectType); | 821 obj = PyObject_New(bserObject, &bserObjectType); |
729 obj->values = PyTuple_New(numkeys); | 825 obj->values = PyTuple_New(numkeys); |
730 } | 826 } |
731 dict = (PyObject*)obj; | 827 dict = (PyObject*)obj; |
732 } | 828 } |
733 if (!dict) { | 829 if (!dict) { |
734 fail: | 830 fail: |
735 Py_DECREF(keys); | 831 Py_DECREF(keys); |
736 Py_DECREF(arrval); | 832 Py_DECREF(arrval); |
737 return NULL; | 833 return NULL; |
738 } | 834 } |
739 | 835 |
740 for (keyidx = 0; keyidx < numkeys; keyidx++) { | 836 for (keyidx = 0; keyidx < numkeys; keyidx++) { |
741 PyObject *key; | 837 PyObject* key; |
742 PyObject *ele; | 838 PyObject* ele; |
743 | 839 |
744 if (**ptr == BSER_SKIP) { | 840 if (**ptr == BSER_SKIP) { |
745 *ptr = *ptr + 1; | 841 *ptr = *ptr + 1; |
746 ele = Py_None; | 842 ele = Py_None; |
747 Py_INCREF(ele); | 843 Py_INCREF(ele); |
748 } else { | 844 } else { |
749 ele = bser_loads_recursive(ptr, end, mutable); | 845 ele = bser_loads_recursive(ptr, end, ctx); |
750 } | 846 } |
751 | 847 |
752 if (!ele) { | 848 if (!ele) { |
753 goto fail; | 849 goto fail; |
754 } | 850 } |
770 Py_DECREF(keys); | 866 Py_DECREF(keys); |
771 | 867 |
772 return arrval; | 868 return arrval; |
773 } | 869 } |
774 | 870 |
775 static PyObject *bser_loads_recursive(const char **ptr, const char *end, | 871 static PyObject* bser_loads_recursive( |
776 int mutable) | 872 const char** ptr, |
777 { | 873 const char* end, |
778 const char *buf = *ptr; | 874 const unser_ctx_t* ctx) { |
875 const char* buf = *ptr; | |
779 | 876 |
780 switch (buf[0]) { | 877 switch (buf[0]) { |
781 case BSER_INT8: | 878 case BSER_INT8: |
782 case BSER_INT16: | 879 case BSER_INT16: |
783 case BSER_INT32: | 880 case BSER_INT32: |
784 case BSER_INT64: | 881 case BSER_INT64: { |
785 { | 882 int64_t ival; |
786 int64_t ival; | 883 if (!bunser_int(ptr, end, &ival)) { |
787 if (!bunser_int(ptr, end, &ival)) { | 884 return NULL; |
788 return NULL; | 885 } |
789 } | 886 // Python 3 has one integer type. |
790 if (ival < LONG_MIN || ival > LONG_MAX) { | 887 #if PY_MAJOR_VERSION >= 3 |
791 return PyLong_FromLongLong(ival); | 888 return PyLong_FromLongLong(ival); |
792 } | 889 #else |
793 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); | 890 if (ival < LONG_MIN || ival > LONG_MAX) { |
794 } | 891 return PyLong_FromLongLong(ival); |
795 | 892 } |
796 case BSER_REAL: | 893 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); |
797 { | 894 #endif // PY_MAJOR_VERSION >= 3 |
798 double dval; | 895 } |
799 memcpy(&dval, buf + 1, sizeof(dval)); | 896 |
800 *ptr = buf + 1 + sizeof(double); | 897 case BSER_REAL: { |
801 return PyFloat_FromDouble(dval); | 898 double dval; |
802 } | 899 memcpy(&dval, buf + 1, sizeof(dval)); |
900 *ptr = buf + 1 + sizeof(double); | |
901 return PyFloat_FromDouble(dval); | |
902 } | |
803 | 903 |
804 case BSER_TRUE: | 904 case BSER_TRUE: |
805 *ptr = buf + 1; | 905 *ptr = buf + 1; |
806 Py_INCREF(Py_True); | 906 Py_INCREF(Py_True); |
807 return Py_True; | 907 return Py_True; |
814 case BSER_NULL: | 914 case BSER_NULL: |
815 *ptr = buf + 1; | 915 *ptr = buf + 1; |
816 Py_INCREF(Py_None); | 916 Py_INCREF(Py_None); |
817 return Py_None; | 917 return Py_None; |
818 | 918 |
819 case BSER_STRING: | 919 case BSER_BYTESTRING: { |
820 { | 920 const char* start; |
821 const char *start; | 921 int64_t len; |
822 int64_t len; | 922 |
823 | 923 if (!bunser_bytestring(ptr, end, &start, &len)) { |
824 if (!bunser_string(ptr, end, &start, &len)) { | 924 return NULL; |
825 return NULL; | 925 } |
826 } | 926 |
827 | 927 if (len > LONG_MAX) { |
828 if (len > LONG_MAX) { | 928 PyErr_Format(PyExc_ValueError, "string too long for python"); |
829 PyErr_Format(PyExc_ValueError, "string too long for python"); | 929 return NULL; |
830 return NULL; | 930 } |
831 } | 931 |
832 | 932 if (ctx->value_encoding != NULL) { |
833 return PyString_FromStringAndSize(start, (long)len); | 933 return PyUnicode_Decode( |
834 } | 934 start, (long)len, ctx->value_encoding, ctx->value_errors); |
935 } else { | |
936 return PyBytes_FromStringAndSize(start, (long)len); | |
937 } | |
938 } | |
939 | |
940 case BSER_UTF8STRING: { | |
941 const char* start; | |
942 int64_t len; | |
943 | |
944 if (!bunser_bytestring(ptr, end, &start, &len)) { | |
945 return NULL; | |
946 } | |
947 | |
948 if (len > LONG_MAX) { | |
949 PyErr_Format(PyExc_ValueError, "string too long for python"); | |
950 return NULL; | |
951 } | |
952 | |
953 return PyUnicode_Decode(start, (long)len, "utf-8", "strict"); | |
954 } | |
835 | 955 |
836 case BSER_ARRAY: | 956 case BSER_ARRAY: |
837 return bunser_array(ptr, end, mutable); | 957 return bunser_array(ptr, end, ctx); |
838 | 958 |
839 case BSER_OBJECT: | 959 case BSER_OBJECT: |
840 return bunser_object(ptr, end, mutable); | 960 return bunser_object(ptr, end, ctx); |
841 | 961 |
842 case BSER_TEMPLATE: | 962 case BSER_TEMPLATE: |
843 return bunser_template(ptr, end, mutable); | 963 return bunser_template(ptr, end, ctx); |
844 | 964 |
845 default: | 965 default: |
846 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]); | 966 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]); |
847 } | 967 } |
848 | 968 |
849 return NULL; | 969 return NULL; |
850 } | 970 } |
851 | 971 |
852 // Expected use case is to read a packet from the socket and | 972 static int _pdu_info_helper( |
853 // then call bser.pdu_len on the packet. It returns the total | 973 const char* data, |
854 // length of the entire response that the peer is sending, | 974 const char* end, |
855 // including the bytes already received. This allows the client | 975 uint32_t* bser_version_out, |
856 // to compute the data size it needs to read before it can | 976 uint32_t* bser_capabilities_out, |
857 // decode the data | 977 int64_t* expected_len_out, |
858 static PyObject *bser_pdu_len(PyObject *self, PyObject *args) | 978 off_t* position_out) { |
859 { | 979 uint32_t bser_version; |
860 const char *start = NULL; | 980 uint32_t bser_capabilities = 0; |
861 const char *data = NULL; | 981 int64_t expected_len; |
862 int datalen = 0; | 982 |
863 const char *end; | 983 const char* start; |
864 int64_t expected_len, total_len; | 984 start = data; |
865 | |
866 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { | |
867 return NULL; | |
868 } | |
869 data = start; | |
870 end = data + datalen; | |
871 | |
872 // Validate the header and length | 985 // Validate the header and length |
873 if (memcmp(data, EMPTY_HEADER, 2) != 0) { | 986 if (memcmp(data, EMPTY_HEADER, 2) == 0) { |
987 bser_version = 1; | |
988 } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) { | |
989 bser_version = 2; | |
990 } else { | |
874 PyErr_SetString(PyExc_ValueError, "invalid bser header"); | 991 PyErr_SetString(PyExc_ValueError, "invalid bser header"); |
875 return NULL; | 992 return 0; |
876 } | 993 } |
877 | 994 |
878 data += 2; | 995 data += 2; |
996 | |
997 if (bser_version == 2) { | |
998 // Expect an integer telling us what capabilities are supported by the | |
999 // remote server (currently unused). | |
1000 if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) { | |
1001 return 0; | |
1002 } | |
1003 data += sizeof(bser_capabilities); | |
1004 } | |
879 | 1005 |
880 // Expect an integer telling us how big the rest of the data | 1006 // Expect an integer telling us how big the rest of the data |
881 // should be | 1007 // should be |
882 if (!bunser_int(&data, end, &expected_len)) { | 1008 if (!bunser_int(&data, end, &expected_len)) { |
883 return NULL; | 1009 return 0; |
884 } | 1010 } |
885 | 1011 |
886 total_len = expected_len + (data - start); | 1012 *bser_version_out = bser_version; |
887 if (total_len > LONG_MAX) { | 1013 *bser_capabilities_out = (uint32_t)bser_capabilities; |
888 return PyLong_FromLongLong(total_len); | 1014 *expected_len_out = expected_len; |
889 } | 1015 *position_out = (off_t)(data - start); |
890 return PyInt_FromLong((long)total_len); | 1016 return 1; |
891 } | 1017 } |
892 | 1018 |
893 static PyObject *bser_loads(PyObject *self, PyObject *args) | 1019 // This function parses the PDU header and provides info about the packet |
894 { | 1020 // Returns false if unsuccessful |
895 const char *data = NULL; | 1021 static int pdu_info_helper( |
1022 PyObject* self, | |
1023 PyObject* args, | |
1024 uint32_t* bser_version_out, | |
1025 uint32_t* bser_capabilities_out, | |
1026 int64_t* total_len_out) { | |
1027 const char* start = NULL; | |
1028 const char* data = NULL; | |
896 int datalen = 0; | 1029 int datalen = 0; |
897 const char *end; | 1030 const char* end; |
898 int64_t expected_len; | 1031 int64_t expected_len; |
899 int mutable = 1; | 1032 off_t position; |
900 PyObject *mutable_obj = NULL; | 1033 |
901 | 1034 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { |
902 if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) { | 1035 return 0; |
903 return NULL; | 1036 } |
904 } | 1037 data = start; |
1038 end = data + datalen; | |
1039 | |
1040 if (!_pdu_info_helper( | |
1041 data, | |
1042 end, | |
1043 bser_version_out, | |
1044 bser_capabilities_out, | |
1045 &expected_len, | |
1046 &position)) { | |
1047 return 0; | |
1048 } | |
1049 *total_len_out = (int64_t)(expected_len + position); | |
1050 return 1; | |
1051 } | |
1052 | |
1053 // Expected use case is to read a packet from the socket and then call | |
1054 // bser.pdu_info on the packet. It returns the BSER version, BSER capabilities, | |
1055 // and the total length of the entire response that the peer is sending, | |
1056 // including the bytes already received. This allows the client to compute the | |
1057 // data size it needs to read before it can decode the data. | |
1058 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) { | |
1059 uint32_t version, capabilities; | |
1060 int64_t total_len; | |
1061 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { | |
1062 return NULL; | |
1063 } | |
1064 return Py_BuildValue("kkL", version, capabilities, total_len); | |
1065 } | |
1066 | |
1067 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) { | |
1068 uint32_t version, capabilities; | |
1069 int64_t total_len; | |
1070 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { | |
1071 return NULL; | |
1072 } | |
1073 return Py_BuildValue("L", total_len); | |
1074 } | |
1075 | |
1076 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) { | |
1077 const char* data = NULL; | |
1078 int datalen = 0; | |
1079 const char* start; | |
1080 const char* end; | |
1081 int64_t expected_len; | |
1082 off_t position; | |
1083 PyObject* mutable_obj = NULL; | |
1084 const char* value_encoding = NULL; | |
1085 const char* value_errors = NULL; | |
1086 unser_ctx_t ctx = {1, 0}; | |
1087 | |
1088 static char* kw_list[] = { | |
1089 "buf", "mutable", "value_encoding", "value_errors", NULL}; | |
1090 | |
1091 if (!PyArg_ParseTupleAndKeywords( | |
1092 args, | |
1093 kw, | |
1094 "s#|Ozz:loads", | |
1095 kw_list, | |
1096 &start, | |
1097 &datalen, | |
1098 &mutable_obj, | |
1099 &value_encoding, | |
1100 &value_errors)) { | |
1101 return NULL; | |
1102 } | |
1103 | |
905 if (mutable_obj) { | 1104 if (mutable_obj) { |
906 mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; | 1105 ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; |
907 } | 1106 } |
908 | 1107 ctx.value_encoding = value_encoding; |
1108 if (value_encoding == NULL) { | |
1109 ctx.value_errors = NULL; | |
1110 } else if (value_errors == NULL) { | |
1111 ctx.value_errors = "strict"; | |
1112 } else { | |
1113 ctx.value_errors = value_errors; | |
1114 } | |
1115 data = start; | |
909 end = data + datalen; | 1116 end = data + datalen; |
910 | 1117 |
911 // Validate the header and length | 1118 if (!_pdu_info_helper( |
912 if (memcmp(data, EMPTY_HEADER, 2) != 0) { | 1119 data, |
913 PyErr_SetString(PyExc_ValueError, "invalid bser header"); | 1120 end, |
914 return NULL; | 1121 &ctx.bser_version, |
915 } | 1122 &ctx.bser_capabilities, |
916 | 1123 &expected_len, |
917 data += 2; | 1124 &position)) { |
918 | 1125 return NULL; |
919 // Expect an integer telling us how big the rest of the data | 1126 } |
920 // should be | 1127 |
921 if (!bunser_int(&data, end, &expected_len)) { | 1128 data = start + position; |
922 return NULL; | |
923 } | |
924 | |
925 // Verify | 1129 // Verify |
926 if (expected_len + data != end) { | 1130 if (expected_len + data != end) { |
927 PyErr_SetString(PyExc_ValueError, "bser data len != header len"); | 1131 PyErr_SetString(PyExc_ValueError, "bser data len != header len"); |
928 return NULL; | 1132 return NULL; |
929 } | 1133 } |
930 | 1134 |
931 return bser_loads_recursive(&data, end, mutable); | 1135 return bser_loads_recursive(&data, end, &ctx); |
932 } | 1136 } |
933 | 1137 |
1138 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) { | |
1139 PyObject *load, *string; | |
1140 PyObject* fp = NULL; | |
1141 PyObject* mutable_obj = NULL; | |
1142 const char* value_encoding = NULL; | |
1143 const char* value_errors = NULL; | |
1144 | |
1145 static char* kw_list[] = { | |
1146 "fp", "mutable", "value_encoding", "value_errors", NULL}; | |
1147 | |
1148 if (!PyArg_ParseTupleAndKeywords( | |
1149 args, | |
1150 kw, | |
1151 "OOzz:load", | |
1152 kw_list, | |
1153 &fp, | |
1154 &mutable_obj, | |
1155 &value_encoding, | |
1156 &value_errors)) { | |
1157 return NULL; | |
1158 } | |
1159 | |
1160 load = PyImport_ImportModule("pywatchman.load"); | |
1161 if (load == NULL) { | |
1162 return NULL; | |
1163 } | |
1164 string = PyObject_CallMethod( | |
1165 load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors); | |
1166 Py_DECREF(load); | |
1167 return string; | |
1168 } | |
1169 | |
1170 // clang-format off | |
934 static PyMethodDef bser_methods[] = { | 1171 static PyMethodDef bser_methods[] = { |
935 {"loads", bser_loads, METH_VARARGS, "Deserialize string."}, | 1172 {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS, |
936 {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."}, | 1173 "Deserialize string."}, |
937 {"dumps", bser_dumps, METH_VARARGS, "Serialize string."}, | 1174 {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS, |
1175 "Deserialize a file object"}, | |
1176 {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS, | |
1177 "Extract PDU information."}, | |
1178 {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS, | |
1179 "Extract total PDU length."}, | |
1180 {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS, | |
1181 "Serialize string."}, | |
938 {NULL, NULL, 0, NULL} | 1182 {NULL, NULL, 0, NULL} |
939 }; | 1183 }; |
940 | 1184 |
941 PyMODINIT_FUNC initbser(void) | 1185 #if PY_MAJOR_VERSION >= 3 |
942 { | 1186 static struct PyModuleDef bser_module = { |
1187 PyModuleDef_HEAD_INIT, | |
1188 "bser", | |
1189 "Efficient encoding and decoding of BSER.", | |
1190 -1, | |
1191 bser_methods | |
1192 }; | |
1193 // clang-format on | |
1194 | |
1195 PyMODINIT_FUNC PyInit_bser(void) { | |
1196 PyObject* mod; | |
1197 | |
1198 mod = PyModule_Create(&bser_module); | |
1199 PyType_Ready(&bserObjectType); | |
1200 | |
1201 return mod; | |
1202 } | |
1203 #else | |
1204 | |
1205 PyMODINIT_FUNC initbser(void) { | |
943 (void)Py_InitModule("bser", bser_methods); | 1206 (void)Py_InitModule("bser", bser_methods); |
944 PyType_Ready(&bserObjectType); | 1207 PyType_Ready(&bserObjectType); |
945 } | 1208 } |
1209 #endif // PY_MAJOR_VERSION >= 3 | |
946 | 1210 |
947 /* vim:ts=2:sw=2:et: | 1211 /* vim:ts=2:sw=2:et: |
948 */ | 1212 */ |
949 | |
950 // no-check-code -- this is a 3rd party library |