Mercurial > hg
comparison rust/hg-cpython/src/revlog.rs @ 47799:3fffb48539ee stable
rust-nodemap: falling back to C impl as mitigation
This is a mitigation for https://bz.mercurial-scm.org/show_bug.cgi?id=6554
We see sometimes almost all data except the most recent revisions
removed from the persistent nodemap, but we don't know how to
reproduce yet.
This has sadly repercussions beyond just needing to reconstruct the
persistent nodemap: for instance, this automatically filters out
all bookmarks pointing to revisions that the nodemap cannot resolve.
If such filtering happens in a transaction, the update of the
bookmarks file that happens at the end of transaction loses all
bookmarks that have been affected. There may be similar consequences
for other data.
So this is a data loss, something that we have to prevent as soon as
possible.
As a mitigation measure, we will now fallback to the C implementation
in case nodemap lookups failed. This will add some latency, e.g., in
discovery, yet less than disabling the persistent nodemap entirely.
We considered implementing the fallback directly on the Python
side, but `revlog.get_rev()` is not systematically used, there are
also several direct calls to the index method (`self.index.rev()` for
a `revlog` instance). It is therefore more direct to implement the
mitigation in the rust-cpython wrapper.
Differential Revision: https://phab.mercurial-scm.org/D11238
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Sun, 01 Aug 2021 14:39:38 +0200 |
parents | 33e7508b0ae9 |
children | aa88fb60ecb4 |
comparison
equal
deleted
inserted
replaced
47798:2cd00052ae4d | 47799:3fffb48539ee |
---|---|
57 | 57 |
58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py | 58 // Index API involving nodemap, as defined in mercurial/pure/parsers.py |
59 | 59 |
60 /// Return Revision if found, raises a bare `error.RevlogError` | 60 /// Return Revision if found, raises a bare `error.RevlogError` |
61 /// in case of ambiguity, same as C version does | 61 /// in case of ambiguity, same as C version does |
62 def get_rev(&self, node: PyBytes) -> PyResult<Option<Revision>> { | 62 def get_rev(&self, pynode: PyBytes) -> PyResult<Option<Revision>> { |
63 let opt = self.get_nodetree(py)?.borrow(); | 63 let opt = self.get_nodetree(py)?.borrow(); |
64 let nt = opt.as_ref().unwrap(); | 64 let nt = opt.as_ref().unwrap(); |
65 let idx = &*self.cindex(py).borrow(); | 65 let idx = &*self.cindex(py).borrow(); |
66 let node = node_from_py_bytes(py, &node)?; | 66 let node = node_from_py_bytes(py, &pynode)?; |
67 nt.find_bin(idx, node.into()).map_err(|e| nodemap_error(py, e)) | 67 match nt.find_bin(idx, node.into()) |
68 { | |
69 Ok(None) => | |
70 // fallback to C implementation, remove once | |
71 // https://bz.mercurial-scm.org/show_bug.cgi?id=6554 | |
72 // is fixed (a simple backout should do) | |
73 self.call_cindex(py, "get_rev", &PyTuple::new(py, &[pynode.into_object()]), None)? | |
74 .extract(py), | |
75 Ok(Some(rev)) => Ok(Some(rev)), | |
76 Err(e) => Err(nodemap_error(py, e)), | |
77 } | |
68 } | 78 } |
69 | 79 |
70 /// same as `get_rev()` but raises a bare `error.RevlogError` if node | 80 /// same as `get_rev()` but raises a bare `error.RevlogError` if node |
71 /// is not found. | 81 /// is not found. |
72 /// | 82 /// |
92 Ok(None) => Err(revlog_error(py)), | 102 Ok(None) => Err(revlog_error(py)), |
93 Err(e) => Err(nodemap_error(py, e)), | 103 Err(e) => Err(nodemap_error(py, e)), |
94 } | 104 } |
95 } | 105 } |
96 | 106 |
97 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { | 107 def partialmatch(&self, pynode: PyObject) -> PyResult<Option<PyBytes>> { |
98 let opt = self.get_nodetree(py)?.borrow(); | 108 let opt = self.get_nodetree(py)?.borrow(); |
99 let nt = opt.as_ref().unwrap(); | 109 let nt = opt.as_ref().unwrap(); |
100 let idx = &*self.cindex(py).borrow(); | 110 let idx = &*self.cindex(py).borrow(); |
101 | 111 |
102 let node_as_string = if cfg!(feature = "python3-sys") { | 112 let node_as_string = if cfg!(feature = "python3-sys") { |
103 node.cast_as::<PyString>(py)?.to_string(py)?.to_string() | 113 pynode.cast_as::<PyString>(py)?.to_string(py)?.to_string() |
104 } | 114 } |
105 else { | 115 else { |
106 let node = node.extract::<PyBytes>(py)?; | 116 let node = pynode.extract::<PyBytes>(py)?; |
107 String::from_utf8_lossy(node.data(py)).to_string() | 117 String::from_utf8_lossy(node.data(py)).to_string() |
108 }; | 118 }; |
109 | 119 |
110 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?; | 120 let prefix = NodePrefix::from_hex(&node_as_string).map_err(|_| PyErr::new::<ValueError, _>(py, "Invalid node or prefix"))?; |
111 | 121 |
112 nt.find_bin(idx, prefix) | 122 match nt.find_bin(idx, prefix) { |
113 // TODO make an inner API returning the node directly | 123 Ok(None) => |
114 .map(|opt| opt.map( | 124 // fallback to C implementation, remove once |
115 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))) | 125 // https://bz.mercurial-scm.org/show_bug.cgi?id=6554 |
116 .map_err(|e| nodemap_error(py, e)) | 126 // is fixed (a simple backout should do) |
117 | 127 self.call_cindex( |
128 py, "partialmatch", | |
129 &PyTuple::new(py, &[pynode]), None | |
130 )?.extract(py), | |
131 Ok(Some(rev)) => | |
132 Ok(Some(PyBytes::new(py, idx.node(rev).unwrap().as_bytes()))), | |
133 Err(e) => Err(nodemap_error(py, e)), | |
134 } | |
118 } | 135 } |
119 | 136 |
120 /// append an index entry | 137 /// append an index entry |
121 def append(&self, tup: PyTuple) -> PyResult<PyObject> { | 138 def append(&self, tup: PyTuple) -> PyResult<PyObject> { |
122 if tup.len(py) < 8 { | 139 if tup.len(py) < 8 { |