# HG changeset patch # User Gregory Szorc # Date 1572729468 25200 # Node ID d359dfc15acaa6fe546f32a5efc8f450cb7af938 # Parent bdebc7b54dcae12e507479b592d2acecbf72cae3 fsmonitor: handle unicode keys in tuples In Python 3, keys in the bset tuple are typically str, not bytes. PyBytes_AsString() would return NULL. But we weren't checking the return value and this would lead to a segfault. This commit makes the code type and Python version aware. The Python version specific code is to allow us to utilize a modern API for converting str -> char* without having to allocate an extra PyObject. FWIW I wanted to assume that keys were always str. However, there appear to be some bytes keys in some cases. I haven't debugged this further. Differential Revision: https://phab.mercurial-scm.org/D7210 diff -r bdebc7b54dca -r d359dfc15aca hgext/fsmonitor/pywatchman/bser.c --- a/hgext/fsmonitor/pywatchman/bser.c Sat Nov 02 13:39:23 2019 -0700 +++ b/hgext/fsmonitor/pywatchman/bser.c Sat Nov 02 14:17:48 2019 -0700 @@ -175,7 +175,22 @@ const char* item_name = NULL; PyObject* key = PyTuple_GET_ITEM(obj->keys, i); - item_name = PyBytes_AsString(key); + if (PyUnicode_Check(key)) { +#if PY_MAJOR_VERSION >= 3 + item_name = PyUnicode_AsUTF8(key); +#else + PyObject* utf = PyUnicode_AsEncodedString(key, "utf-8", "ignore"); + if (utf == NULL) { + goto bail; + } + item_name = PyBytes_AsString(utf); +#endif + } else { + item_name = PyBytes_AsString(key); + } + if (item_name == NULL) { + goto bail; + } if (!strcmp(item_name, namestr)) { ret = PySequence_GetItem(obj->values, i); goto bail;