pathencode: new C module with fast encodedir() function
Not yet used (will be enabled in a later patch).
This patch is a stripped down version of patches originally created by
Bryan O'Sullivan <bryano@fb.com>
--- a/mercurial/parsers.c Tue Sep 18 07:58:50 2012 +0200
+++ b/mercurial/parsers.c Tue Sep 18 11:43:30 2012 +0200
@@ -1506,11 +1506,14 @@
static char parsers_doc[] = "Efficient content parsing.";
+PyObject *encodedir(PyObject *self, PyObject *args);
+
static PyMethodDef methods[] = {
{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+ {"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
{NULL, NULL}
};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/pathencode.c Tue Sep 18 11:43:30 2012 +0200
@@ -0,0 +1,125 @@
+/*
+ pathencode.c - efficient path name encoding
+
+ Copyright 2012 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#include <Python.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+/* state machine for dir-encoding */
+enum dir_state {
+ DDOT,
+ DH,
+ DHGDI,
+ DDEFAULT,
+};
+
+static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+ char c)
+{
+ if (dest) {
+ assert(*destlen < destsize);
+ dest[*destlen] = c;
+ }
+ (*destlen)++;
+}
+
+static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
+ const void *src, Py_ssize_t len)
+{
+ if (dest) {
+ assert(*destlen + len < destsize);
+ memcpy((void *)&dest[*destlen], src, len);
+ }
+ *destlen += len;
+}
+
+static Py_ssize_t _encodedir(char *dest, size_t destsize,
+ const char *src, Py_ssize_t len)
+{
+ enum dir_state state = DDEFAULT;
+ Py_ssize_t i = 0, destlen = 0;
+
+ while (i < len) {
+ switch (state) {
+ case DDOT:
+ switch (src[i]) {
+ case 'd':
+ case 'i':
+ state = DHGDI;
+ charcopy(dest, &destlen, destsize, src[i++]);
+ break;
+ case 'h':
+ state = DH;
+ charcopy(dest, &destlen, destsize, src[i++]);
+ break;
+ default:
+ state = DDEFAULT;
+ break;
+ }
+ break;
+ case DH:
+ if (src[i] == 'g') {
+ state = DHGDI;
+ charcopy(dest, &destlen, destsize, src[i++]);
+ }
+ else state = DDEFAULT;
+ break;
+ case DHGDI:
+ if (src[i] == '/') {
+ memcopy(dest, &destlen, destsize, ".hg", 3);
+ charcopy(dest, &destlen, destsize, src[i++]);
+ }
+ state = DDEFAULT;
+ break;
+ case DDEFAULT:
+ if (src[i] == '.')
+ state = DDOT;
+ charcopy(dest, &destlen, destsize, src[i++]);
+ break;
+ }
+ }
+
+ return destlen;
+}
+
+PyObject *encodedir(PyObject *self, PyObject *args)
+{
+ Py_ssize_t len, newlen;
+ PyObject *pathobj, *newobj;
+ char *path;
+
+ if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj))
+ return NULL;
+
+ if (PyString_AsStringAndSize(pathobj, &path, &len) == -1) {
+ PyErr_SetString(PyExc_TypeError, "expected a string");
+ return NULL;
+ }
+
+ newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
+
+ if (newlen == len + 1) {
+ Py_INCREF(pathobj);
+ return pathobj;
+ }
+
+ newobj = PyString_FromStringAndSize(NULL, newlen);
+
+ if (newobj) {
+ PyString_GET_SIZE(newobj)--;
+ _encodedir(PyString_AS_STRING(newobj), newlen, path,
+ len + 1);
+ }
+
+ return newobj;
+}
--- a/setup.py Tue Sep 18 07:58:50 2012 +0200
+++ b/setup.py Tue Sep 18 11:43:30 2012 +0200
@@ -421,7 +421,8 @@
Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
- Extension('mercurial.parsers', ['mercurial/parsers.c']),
+ Extension('mercurial.parsers', ['mercurial/parsers.c',
+ 'mercurial/pathencode.c']),
]
osutil_ldflags = []