comparison mercurial/cext/revlog.c @ 37968:0304f22497fa

revlog: use node tree (native code) for shortest() calculation I want to rewrite revlog.shortest() to disambiguate only among hex nodeids and then disambiguate the result with revnums at a higher level (in scmutil). However, that would slow down `hg log -T '{shortest(node,1)}\n'` from 5.0s to 6.8s, which I wasn't sure would be acceptable. So this patch makes revlog.shortest() use the node tree for finding the length of the shortest prefix that's unambiguous among nodeids. Once that has been found, it makes it longer until it is also not ambiguous with a revnum. This speeds up `hg log -T '{shortest(node,1)}\n'` from 5.0s to 4.0s. Differential Revision: https://phab.mercurial-scm.org/D3499
author Martin von Zweigbergk <martinvonz@google.com>
date Wed, 02 May 2018 23:17:58 -0700
parents 892592475094
children 312d7d14d44e
comparison
equal deleted inserted replaced
37967:7932be8b0559 37968:0304f22497fa
1257 return -3; 1257 return -3;
1258 1258
1259 return nt_find(self, node, nodelen, 1); 1259 return nt_find(self, node, nodelen, 1);
1260 } 1260 }
1261 1261
1262 /*
1263 * Find the length of the shortest unique prefix of node.
1264 *
1265 * Return values:
1266 *
1267 * -3: error (exception set)
1268 * -2: not found (no exception set)
1269 * rest: length of shortest prefix
1270 */
1271 static int nt_shortest(indexObject *self, const char *node)
1272 {
1273 int level, off;
1274
1275 if (nt_init(self) == -1)
1276 return -3;
1277 if (nt_populate(self) == -1)
1278 return -3;
1279
1280 for (level = off = 0; level < 40; level++) {
1281 int k, v;
1282 nodetree *n = &self->nt[off];
1283 k = nt_level(node, level);
1284 v = n->children[k];
1285 if (v < 0) {
1286 const char *n;
1287 v = -(v + 1);
1288 n = index_node(self, v);
1289 if (memcmp(node, n, 20) != 0)
1290 /*
1291 * Found a unique prefix, but it wasn't for the
1292 * requested node (i.e the requested node does
1293 * not exist).
1294 */
1295 return -2;
1296 return level + 1;
1297 }
1298 if (v == 0)
1299 return -2;
1300 off = v;
1301 }
1302 /*
1303 * The node was still not unique after 40 hex digits, so this won't
1304 * happen. Also, if we get here, then there's a programming error in
1305 * this file that made us insert a node longer than 40 hex digits.
1306 */
1307 PyErr_SetString(PyExc_Exception, "broken node tree");
1308 return -3;
1309 }
1310
1262 static PyObject *index_partialmatch(indexObject *self, PyObject *args) 1311 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
1263 { 1312 {
1264 const char *fullnode; 1313 const char *fullnode;
1265 int nodelen; 1314 int nodelen;
1266 char *node; 1315 char *node;
1303 fullnode = index_node_existing(self, rev); 1352 fullnode = index_node_existing(self, rev);
1304 if (fullnode == NULL) { 1353 if (fullnode == NULL) {
1305 return NULL; 1354 return NULL;
1306 } 1355 }
1307 return PyBytes_FromStringAndSize(fullnode, 20); 1356 return PyBytes_FromStringAndSize(fullnode, 20);
1357 }
1358
1359 static PyObject *index_shortest(indexObject *self, PyObject *args)
1360 {
1361 Py_ssize_t nodelen;
1362 PyObject *val;
1363 char *node;
1364 int length;
1365
1366 if (!PyArg_ParseTuple(args, "O", &val))
1367 return NULL;
1368 if (node_check(val, &node, &nodelen) == -1)
1369 return NULL;
1370
1371 self->ntlookups++;
1372 length = nt_shortest(self, node);
1373 if (length == -3)
1374 return NULL;
1375 if (length == -2) {
1376 raise_revlog_error();
1377 return NULL;
1378 }
1379 return PyInt_FromLong(length);
1308 } 1380 }
1309 1381
1310 static PyObject *index_m_get(indexObject *self, PyObject *args) 1382 static PyObject *index_m_get(indexObject *self, PyObject *args)
1311 { 1383 {
1312 Py_ssize_t nodelen; 1384 Py_ssize_t nodelen;
1993 "determine revisions with deltas to reconstruct fulltext"}, 2065 "determine revisions with deltas to reconstruct fulltext"},
1994 {"insert", (PyCFunction)index_insert, METH_VARARGS, 2066 {"insert", (PyCFunction)index_insert, METH_VARARGS,
1995 "insert an index entry"}, 2067 "insert an index entry"},
1996 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, 2068 {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
1997 "match a potentially ambiguous node ID"}, 2069 "match a potentially ambiguous node ID"},
2070 {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
2071 "find length of shortest hex nodeid of a binary ID"},
1998 {"stats", (PyCFunction)index_stats, METH_NOARGS, 2072 {"stats", (PyCFunction)index_stats, METH_NOARGS,
1999 "stats for the index"}, 2073 "stats for the index"},
2000 {NULL} /* Sentinel */ 2074 {NULL} /* Sentinel */
2001 }; 2075 };
2002 2076