Mercurial > hg

--- a/mercurial/encoding.py	Tue Sep 30 15:58:08 2014 -0700
+++ b/mercurial/encoding.py	Fri Oct 03 18:42:39 2014 -0700
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.

-import error
+import error, parsers
 import unicodedata, locale, os

 def _getpreferredencoding():
@@ -258,6 +258,15 @@
             return concat(usub.encode(encoding))
     return ellipsis # no enough room for multi-column characters

+def asciilower(s):
+    '''convert a string to lowercase if ASCII
+
+    Raises UnicodeDecodeError if non-ASCII characters are found.'''
+    s.decode('ascii')
+    return s.lower()
+
+asciilower = getattr(parsers, 'asciilower', asciilower)
+
 def lower(s):
     "best-effort encoding-aware case-folding of local string s"
     try:
--- a/mercurial/parsers.c	Tue Sep 30 15:58:08 2014 -0700
+++ b/mercurial/parsers.c	Fri Oct 03 18:42:39 2014 -0700
@@ -35,6 +35,27 @@
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
 };

+static char lowertable[128] = {
+	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+	'\x40',
+	        '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
+	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
+	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
+	'\x78', '\x79', '\x7a',                                         /* X-Z */
+	                        '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+	'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
+	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
+	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
+	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
+};
+
 static inline int hexdigit(const char *p, Py_ssize_t off)
 {
 	int8_t val = hextable[(unsigned char)p[off]];
@@ -72,6 +93,39 @@
 	return ret;
 }

+static PyObject *asciilower(PyObject *self, PyObject *args)
+{
+	char *str, *newstr;
+	int i, len;
+	PyObject *newobj = NULL;
+
+	if (!PyArg_ParseTuple(args, "s#", &str, &len))
+		goto quit;
+
+	newobj = PyBytes_FromStringAndSize(NULL, len);
+	if (!newobj)
+		goto quit;
+
+	newstr = PyBytes_AS_STRING(newobj);
+
+	for (i = 0; i < len; i++) {
+		char c = str[i];
+		if (c & 0x80) {
+			PyObject *err = PyUnicodeDecodeError_Create(
+				"ascii", str, len, i, (i + 1),
+				"unexpected code byte");
+			PyErr_SetObject(PyExc_UnicodeDecodeError, err);
+			goto quit;
+		}
+		newstr[i] = lowertable[(unsigned char)c];
+	}
+
+	return newobj;
+quit:
+	Py_XDECREF(newobj);
+	return NULL;
+}
+
 /*
  * This code assumes that a manifest is stitched together with newline
  * ('\n') characters.
@@ -2165,6 +2219,7 @@
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+	{"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
 	{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
 	{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
 	{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},