comparison rust/hg-cpython/src/revlog.rs @ 47799:3fffb48539ee stable

rust-nodemap: falling back to C impl as mitigation This is a mitigation for https://bz.mercurial-scm.org/show_bug.cgi?id=6554 We see sometimes almost all data except the most recent revisions removed from the persistent nodemap, but we don't know how to reproduce yet. This has sadly repercussions beyond just needing to reconstruct the persistent nodemap: for instance, this automatically filters out all bookmarks pointing to revisions that the nodemap cannot resolve. If such filtering happens in a transaction, the update of the bookmarks file that happens at the end of transaction loses all bookmarks that have been affected. There may be similar consequences for other data. So this is a data loss, something that we have to prevent as soon as possible. As a mitigation measure, we will now fallback to the C implementation in case nodemap lookups failed. This will add some latency, e.g., in discovery, yet less than disabling the persistent nodemap entirely. We considered implementing the fallback directly on the Python side, but `revlog.get_rev()` is not systematically used, there are also several direct calls to the index method (`self.index.rev()` for a `revlog` instance). It is therefore more direct to implement the mitigation in the rust-cpython wrapper. Differential Revision: https://phab.mercurial-scm.org/D11238
author Georges Racinet <georges.racinet@octobus.net>
date Sun, 01 Aug 2021 14:39:38 +0200
parents 33e7508b0ae9
children aa88fb60ecb4
comparison
equal deleted inserted replaced
47798:2cd00052ae4d 47799:3fffb48539ee
57 57
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
59 59
60 /// Return Revision if found, raises a bare `error.RevlogError` 60 /// Return Revision if found, raises a bare `error.RevlogError`
61 /// in case of ambiguity, same as C version does 61 /// in case of ambiguity, same as C version does
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { 62 def get_rev(&self, pynode: PyBytes) -> PyResult<Option<Revision>> {
63 let opt = self.get_nodetree(py)?.borrow(); 63 let opt = self.get_nodetree(py)?.borrow();
64 let nt = opt.as_ref().unwrap(); 64 let nt = opt.as_ref().unwrap();
65 let idx = &*self.cindex(py).borrow(); 65 let idx = &*self.cindex(py).borrow();
66 let node = node_from_py_bytes(py, &node)?; 66 let node = node_from_py_bytes(py, &pynode)?;
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) 67 match nt.find_bin(idx, node.into())
68 {
69 Ok(None) =>
70 // fallback to C implementation, remove once
71 // https://bz.mercurial-scm.org/show_bug.cgi?id=6554
72 // is fixed (a simple backout should do)
73 self.call_cindex(py, "get_rev", &PyTuple::new(py, &[pynode.into_object()]), None)?
74 .extract(py),
75 Ok(Some(rev)) => Ok(Some(rev)),
76 Err(e) => Err(nodemap_error(py, e)),
77 }
68 } 78 }
69 79
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node 80 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
71 /// is not found. 81 /// is not found.
72 /// 82 ///
92 Ok(None) => Err(revlog_error(py)), 102 Ok(None) => Err(revlog_error(py)),
93 Err(e) => Err(nodemap_error(py, e)), 103 Err(e) => Err(nodemap_error(py, e)),
94 } 104 }
95 } 105 }
96 106
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { 107 def partialmatch(&self, pynode: PyObject) -> PyResult<Option<PyBytes>> {
98 let opt = self.get_nodetree(py)?.borrow(); 108 let opt = self.get_nodetree(py)?.borrow();
99 let nt = opt.as_ref().unwrap(); 109 let nt = opt.as_ref().unwrap();
100 let idx = &*self.cindex(py).borrow(); 110 let idx = &*self.cindex(py).borrow();
101 111
102 let node_as_string = if cfg!(feature = "python3-sys") { 112 let node_as_string = if cfg!(feature = "python3-sys") {
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string() 113 pynode.cast_as::<PyString>(py)?.to_string(py)?.to_string()
104 } 114 }
105 else { 115 else {
106 let node = node.extract::<PyBytes>(py)?; 116 let node = pynode.extract::<PyBytes>(py)?;
107 String::from_utf8_lossy(node.data(py)).to_string() 117 String::from_utf8_lossy(node.data(py)).to_string()
108 }; 118 };
109 119
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?; 120 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?;
111 121
112 nt.find_bin(idx, prefix) 122 match nt.find_bin(idx, prefix) {
113 // TODO make an inner API returning the node directly 123 Ok(None) =>
114 .map(|opt| opt.map( 124 // fallback to C implementation, remove once
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) 125 // https://bz.mercurial-scm.org/show_bug.cgi?id=6554
116 .map_err(|e| nodemap_error(py, e)) 126 // is fixed (a simple backout should do)
117 127 self.call_cindex(
128 py, "partialmatch",
129 &PyTuple::new(py, &[pynode]), None
130 )?.extract(py),
131 Ok(Some(rev)) =>
132 Ok(Some(PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))),
133 Err(e) => Err(nodemap_error(py, e)),
134 }
118 } 135 }
119 136
120 /// append an index entry 137 /// append an index entry
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> { 138 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
122 if tup.len(py) < 8 { 139 if tup.len(py) < 8 {