# HG changeset patch # User Augie Fackler # Date 1570565895 14400 # Node ID 2a0774e9d2a8ea3b452c416307ed1fc006010bce # Parent 843da18386d580779a30b7d103615181a309262c dirs: fix trivial over-read of input data This code, introduced in 8c0a7eeda06d, was intentionally over-reading an input string to avoid getting a shared string object for a one-byte input. Unfortunately with an empty input (like in the case of a fuzzer getting started) this was a trivial over-read and triggered an AddressSanitizer failure. I went out of my way to make sure the code still does the copy-avoidance tricks. I don't think this change will cost us much performance since the one-character strings should be cached aggressively anyway. Differential Revision: https://phab.mercurial-scm.org/D7030 diff -r 843da18386d5 -r 2a0774e9d2a8 mercurial/cext/dirs.c --- a/mercurial/cext/dirs.c Sun Oct 06 23:36:52 2019 -0400 +++ b/mercurial/cext/dirs.c Tue Oct 08 16:18:15 2019 -0400 @@ -68,26 +68,41 @@ while ((pos = _finddir(cpath, pos - 1)) != -1) { PyObject *val; - /* It's likely that every prefix already has an entry - in our dict. Try to avoid allocating and - deallocating a string for each prefix we check. */ - if (key != NULL) - ((PyBytesObject *)key)->ob_shash = -1; - else { - /* Force Python to not reuse a small shared string. */ - key = PyBytes_FromStringAndSize(cpath, - pos < 2 ? 2 : pos); + if (pos < 2) { + key = PyBytes_FromStringAndSize(cpath, pos); if (key == NULL) goto bail; + } else { + /* It's likely that every prefix already has an entry + in our dict. Try to avoid allocating and + deallocating a string for each prefix we check. */ + if (key != NULL) + ((PyBytesObject *)key)->ob_shash = -1; + else { + /* We know pos >= 2, so we won't get a small + * shared string. */ + key = PyBytes_FromStringAndSize(cpath, pos); + if (key == NULL) + goto bail; + } + /* Py_SIZE(o) refers to the ob_size member of + * the struct. Yes, assigning to what looks + * like a function seems wrong. */ + Py_SIZE(key) = pos; + ((PyBytesObject *)key)->ob_sval[pos] = '\0'; } - /* Py_SIZE(o) refers to the ob_size member of the struct. Yes, - * assigning to what looks like a function seems wrong. */ - Py_SIZE(key) = pos; - ((PyBytesObject *)key)->ob_sval[pos] = '\0'; val = PyDict_GetItem(dirs, key); if (val != NULL) { PYLONG_VALUE(val) += 1; + if (pos < 2) { + /* This was a short string, so we + * probably got a small shared string + * we can't mutate on the next loop + * iteration. Clear it. + */ + Py_CLEAR(key); + } break; }