dirstate: store mtimes with nanosecond precision in memory
Keep integer seconds since the Unix epoch,
together with integer nanoseconds in the `0 <= n < 1e9` range.
For now, nanoseconds are still always zero.
This commit is about data structure changes.
Differential Revision: https://phab.mercurial-scm.org/D11684
--- a/mercurial/cext/parsers.c Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/cext/parsers.c Mon Oct 18 11:23:07 2021 +0200
@@ -57,7 +57,8 @@
int has_meaningful_mtime;
int mode;
int size;
- int mtime;
+ int mtime_s;
+ int mtime_ns;
PyObject *parentfiledata;
PyObject *fallback_exec;
PyObject *fallback_symlink;
@@ -111,15 +112,10 @@
}
if (parentfiledata != Py_None) {
- if (!PyTuple_CheckExact(parentfiledata)) {
- PyErr_SetString(
- PyExc_TypeError,
- "parentfiledata should be a Tuple or None");
+ if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
+ &mtime_s, &mtime_ns)) {
return NULL;
}
- mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
- size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
- mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
} else {
has_meaningful_data = 0;
has_meaningful_mtime = 0;
@@ -134,9 +130,11 @@
}
if (has_meaningful_mtime) {
t->flags |= dirstate_flag_has_file_mtime;
- t->mtime = mtime;
+ t->mtime_s = mtime_s;
+ t->mtime_ns = mtime_ns;
} else {
- t->mtime = 0;
+ t->mtime_s = 0;
+ t->mtime_ns = 0;
}
return (PyObject *)t;
}
@@ -254,7 +252,7 @@
(self->flags & dirstate_flag_p2_info)) {
return ambiguous_time;
} else {
- return self->mtime;
+ return self->mtime_s;
}
}
@@ -272,7 +270,8 @@
} else {
flags &= ~dirstate_flag_mode_is_symlink;
}
- return Py_BuildValue("iii", flags, self->size, self->mtime);
+ return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
+ self->mtime_ns);
};
static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
@@ -297,14 +296,30 @@
};
static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
- PyObject *value)
+ PyObject *now)
{
- long now;
- if (!pylong_to_long(value, &now)) {
+ int now_s;
+ int now_ns;
+ if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
return NULL;
}
- if (dirstate_item_c_v1_state(self) == 'n' &&
- dirstate_item_c_v1_mtime(self) == now) {
+ if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
+ Py_RETURN_TRUE;
+ } else {
+ Py_RETURN_FALSE;
+ }
+};
+
+static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
+ PyObject *other)
+{
+ int other_s;
+ int other_ns;
+ if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
+ return NULL;
+ }
+ if ((self->flags & dirstate_flag_has_file_mtime) &&
+ self->mtime_s == other_s && self->mtime_ns == other_ns) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
@@ -324,7 +339,8 @@
t->flags = 0;
t->mode = 0;
t->size = 0;
- t->mtime = 0;
+ t->mtime_s = 0;
+ t->mtime_ns = 0;
if (state == 'm') {
t->flags = (dirstate_flag_wc_tracked |
@@ -360,7 +376,7 @@
dirstate_flag_has_file_mtime);
t->mode = mode;
t->size = size;
- t->mtime = mtime;
+ t->mtime_s = mtime;
}
} else {
PyErr_Format(PyExc_RuntimeError,
@@ -395,7 +411,8 @@
if (!t) {
return NULL;
}
- if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
+ if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
+ &t->mtime_ns)) {
return NULL;
}
if (t->flags & dirstate_flag_expected_state_is_modified) {
@@ -431,8 +448,9 @@
static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
PyObject *args)
{
- int size, mode, mtime;
- if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
+ int size, mode, mtime_s, mtime_ns;
+ if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
+ &mtime_ns)) {
return NULL;
}
self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
@@ -440,7 +458,8 @@
dirstate_flag_has_file_mtime;
self->mode = mode;
self->size = size;
- self->mtime = mtime;
+ self->mtime_s = mtime_s;
+ self->mtime_ns = mtime_ns;
Py_RETURN_NONE;
}
@@ -455,8 +474,9 @@
{
self->flags &= ~dirstate_flag_wc_tracked;
self->mode = 0;
- self->mtime = 0;
self->size = 0;
+ self->mtime_s = 0;
+ self->mtime_ns = 0;
Py_RETURN_NONE;
}
@@ -467,8 +487,9 @@
dirstate_flag_has_meaningful_data |
dirstate_flag_has_file_mtime);
self->mode = 0;
- self->mtime = 0;
self->size = 0;
+ self->mtime_s = 0;
+ self->mtime_ns = 0;
}
Py_RETURN_NONE;
}
@@ -485,6 +506,8 @@
"return a \"mtime\" suitable for v1 serialization"},
{"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
"True if the stored mtime would be ambiguous with the current time"},
+ {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
+ METH_O, "True if the stored mtime is likely equal to the given mtime"},
{"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
{"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
@@ -855,11 +878,12 @@
Py_ssize_t nbytes, pos, l;
PyObject *k, *v = NULL, *pn;
char *p, *s;
- int now;
+ int now_s;
+ int now_ns;
- if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
- &PyDict_Type, ©map, &PyTuple_Type, &pl,
- &now)) {
+ if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
+ &map, &PyDict_Type, ©map, &PyTuple_Type, &pl,
+ &now_s, &now_ns)) {
return NULL;
}
@@ -928,7 +952,7 @@
mode = dirstate_item_c_v1_mode(tuple);
size = dirstate_item_c_v1_size(tuple);
mtime = dirstate_item_c_v1_mtime(tuple);
- if (state == 'n' && mtime == now) {
+ if (state == 'n' && tuple->mtime_s == now_s) {
/* See pure/parsers.py:pack_dirstate for why we do
* this. */
mtime = -1;
--- a/mercurial/cext/util.h Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/cext/util.h Mon Oct 18 11:23:07 2021 +0200
@@ -27,7 +27,8 @@
int flags;
int mode;
int size;
- int mtime;
+ int mtime_s;
+ int mtime_ns;
} dirstateItemObject;
/* clang-format on */
--- a/mercurial/dirstate.py Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstate.py Mon Oct 18 11:23:07 2021 +0200
@@ -31,6 +31,10 @@
util,
)
+from .dirstateutils import (
+ timestamp,
+)
+
from .interfaces import (
dirstate as intdirstate,
util as interfaceutil,
@@ -66,7 +70,7 @@
'''Get "now" timestamp on filesystem'''
tmpfd, tmpname = vfs.mkstemp()
try:
- return os.fstat(tmpfd)[stat.ST_MTIME]
+ return timestamp.mtime_of(os.fstat(tmpfd))
finally:
os.close(tmpfd)
vfs.unlink(tmpname)
@@ -122,7 +126,7 @@
# UNC path pointing to root share (issue4557)
self._rootdir = pathutil.normasprefix(root)
self._dirty = False
- self._lastnormaltime = 0
+ self._lastnormaltime = timestamp.zero()
self._ui = ui
self._filecache = {}
self._parentwriters = 0
@@ -440,7 +444,7 @@
for a in ("_map", "_branch", "_ignore"):
if a in self.__dict__:
delattr(self, a)
- self._lastnormaltime = 0
+ self._lastnormaltime = timestamp.zero()
self._dirty = False
self._parentwriters = 0
self._origpl = None
@@ -639,7 +643,7 @@
s = os.lstat(self._join(filename))
mode = s.st_mode
size = s.st_size
- mtime = s[stat.ST_MTIME]
+ mtime = timestamp.mtime_of(s)
return (mode, size, mtime)
def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
@@ -720,7 +724,7 @@
def clear(self):
self._map.clear()
- self._lastnormaltime = 0
+ self._lastnormaltime = timestamp.zero()
self._dirty = True
def rebuild(self, parent, allfiles, changedfiles=None):
@@ -823,7 +827,7 @@
if now is None:
# use the modification time of the newly created temporary file as the
# filesystem's notion of 'now'
- now = util.fstat(st)[stat.ST_MTIME] & _rangemask
+ now = timestamp.mtime_of(util.fstat(st))
# enough 'delaywrite' prevents 'pack_dirstate' from dropping
# timestamp of each entries in dirstate, because of 'now > mtime'
@@ -840,11 +844,12 @@
start = int(clock) - (int(clock) % delaywrite)
end = start + delaywrite
time.sleep(end - clock)
- now = end # trust our estimate that the end is near now
+ # trust our estimate that the end is near now
+ now = timestamp.timestamp((end, 0))
break
self._map.write(tr, st, now)
- self._lastnormaltime = 0
+ self._lastnormaltime = timestamp.zero()
self._dirty = False
def _dirignore(self, f):
@@ -1377,17 +1382,9 @@
uadd(fn)
continue
- # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
- # written like that for performance reasons. dmap[fn] is not a
- # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
- # opcode has fast paths when the value to be unpacked is a tuple or
- # a list, but falls back to creating a full-fledged iterator in
- # general. That is much slower than simply accessing and storing the
- # tuple members one by one.
t = dget(fn)
mode = t.mode
size = t.size
- time = t.mtime
if not st and t.tracked:
dadd(fn)
@@ -1412,12 +1409,9 @@
ladd(fn)
else:
madd(fn)
- elif (
- time != st[stat.ST_MTIME]
- and time != st[stat.ST_MTIME] & _rangemask
- ):
+ elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
ladd(fn)
- elif st[stat.ST_MTIME] == lastnormaltime:
+ elif timestamp.mtime_of(st) == lastnormaltime:
# fn may have just been marked as normal and it may have
# changed in the same second without changing its size.
# This can happen if we quickly do multiple commits.
--- a/mercurial/dirstatemap.py Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstatemap.py Mon Oct 18 11:23:07 2021 +0200
@@ -127,7 +127,6 @@
def set_clean(self, filename, mode, size, mtime):
"""mark a file as back to a clean state"""
entry = self[filename]
- mtime = mtime & rangemask
size = size & rangemask
entry.set_clean(mode, size, mtime)
self._refresh_entry(filename, entry)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/dirstateutils/timestamp.py Mon Oct 18 11:23:07 2021 +0200
@@ -0,0 +1,53 @@
+# Copyright Mercurial Contributors
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import stat
+
+
+rangemask = 0x7FFFFFFF
+
+
+class timestamp(tuple):
+ """
+ A Unix timestamp with nanoseconds precision,
+ modulo 2**31 seconds.
+
+ A 2-tuple containing:
+
+ `truncated_seconds`: seconds since the Unix epoch,
+ truncated to its lower 31 bits
+
+ `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
+ """
+
+ def __new__(cls, value):
+ truncated_seconds, subsec_nanos = value
+ value = (truncated_seconds & rangemask, subsec_nanos)
+ return super(timestamp, cls).__new__(cls, value)
+
+
+def zero():
+ """
+ Returns the `timestamp` at the Unix epoch.
+ """
+ return tuple.__new__(timestamp, (0, 0))
+
+
+def mtime_of(stat_result):
+ """
+ Takes an `os.stat_result`-like object and returns a `timestamp` object
+ for its modification time.
+ """
+ # https://docs.python.org/2/library/os.html#os.stat_float_times
+ # "For compatibility with older Python versions,
+ # accessing stat_result as a tuple always returns integers."
+ secs = stat_result[stat.ST_MTIME]
+
+ # For now
+ subsec_nanos = 0
+
+ return timestamp((secs, subsec_nanos))
--- a/mercurial/dirstateutils/v2.py Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstateutils/v2.py Mon Oct 18 11:23:07 2021 +0200
@@ -107,7 +107,10 @@
# Parse child nodes of this node recursively
parse_nodes(map, copy_map, data, children_start, children_count)
- item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
+ # Don’t yet use sub-second precision if it exists in the file,
+ # since other parts of the code still set it to zero.
+ mtime_ns = 0
+ item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
if not item.any_tracked:
continue
path = slice_with_len(data, path_start, path_len)
@@ -147,8 +150,7 @@
copy_source_start = 0
copy_source_len = 0
if entry is not None:
- flags, size, mtime_s = entry.v2_data()
- mtime_ns = 0
+ flags, size, mtime_s, mtime_ns = entry.v2_data()
else:
# There are no mtime-cached directories in the Python implementation
flags = 0
@@ -249,7 +251,6 @@
written to the docket. Again, see more details on the on-disk format in
`mercurial/helptext/internals/dirstate-v2`.
"""
- now = int(now)
data = bytearray()
root_nodes_start = 0
root_nodes_len = 0
--- a/mercurial/merge.py Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/merge.py Mon Oct 18 11:23:07 2021 +0200
@@ -9,13 +9,13 @@
import collections
import errno
-import stat
import struct
from .i18n import _
from .node import nullrev
from .thirdparty import attr
from .utils import stringutil
+from .dirstateutils import timestamp
from . import (
copies,
encoding,
@@ -1406,8 +1406,9 @@
if wantfiledata:
s = wfctx.lstat()
mode = s.st_mode
- mtime = s[stat.ST_MTIME]
- filedata[f] = (mode, size, mtime) # for dirstate.normal
+ mtime = timestamp.mtime_of(s)
+ # for dirstate.update_file's parentfiledata argument:
+ filedata[f] = (mode, size, mtime)
if i == 100:
yield False, (i, f)
i = 0
--- a/mercurial/pure/parsers.py Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/pure/parsers.py Mon Oct 18 11:23:07 2021 +0200
@@ -99,7 +99,8 @@
_p2_info = attr.ib()
_mode = attr.ib()
_size = attr.ib()
- _mtime = attr.ib()
+ _mtime_s = attr.ib()
+ _mtime_ns = attr.ib()
_fallback_exec = attr.ib()
_fallback_symlink = attr.ib()
@@ -123,7 +124,8 @@
self._mode = None
self._size = None
- self._mtime = None
+ self._mtime_s = None
+ self._mtime_ns = None
if parentfiledata is None:
has_meaningful_mtime = False
has_meaningful_data = False
@@ -131,10 +133,10 @@
self._mode = parentfiledata[0]
self._size = parentfiledata[1]
if has_meaningful_mtime:
- self._mtime = parentfiledata[2]
+ self._mtime_s, self._mtime_ns = parentfiledata[2]
@classmethod
- def from_v2_data(cls, flags, size, mtime):
+ def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
"""Build a new DirstateItem object from V2 data"""
has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
@@ -170,7 +172,7 @@
p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
has_meaningful_data=has_mode_size,
has_meaningful_mtime=has_meaningful_mtime,
- parentfiledata=(mode, size, mtime),
+ parentfiledata=(mode, size, (mtime_s, mtime_ns)),
fallback_exec=fallback_exec,
fallback_symlink=fallback_symlink,
)
@@ -207,13 +209,13 @@
wc_tracked=True,
p1_tracked=True,
has_meaningful_mtime=False,
- parentfiledata=(mode, size, 42),
+ parentfiledata=(mode, size, (42, 0)),
)
else:
return cls(
wc_tracked=True,
p1_tracked=True,
- parentfiledata=(mode, size, mtime),
+ parentfiledata=(mode, size, (mtime, 0)),
)
else:
raise RuntimeError(b'unknown state: %s' % state)
@@ -224,7 +226,8 @@
This means the next status call will have to actually check its content
to make sure it is correct.
"""
- self._mtime = None
+ self._mtime_s = None
+ self._mtime_ns = None
def set_clean(self, mode, size, mtime):
"""mark a file as "clean" cancelling potential "possibly dirty call"
@@ -238,7 +241,7 @@
self._p1_tracked = True
self._mode = mode
self._size = size
- self._mtime = mtime
+ self._mtime_s, self._mtime_ns = mtime
def set_tracked(self):
"""mark a file as tracked in the working copy
@@ -250,7 +253,8 @@
# the files as needing lookup
#
# Consider dropping this in the future in favor of something less broad.
- self._mtime = None
+ self._mtime_s = None
+ self._mtime_ns = None
def set_untracked(self):
"""mark a file as untracked in the working copy
@@ -260,7 +264,8 @@
self._wc_tracked = False
self._mode = None
self._size = None
- self._mtime = None
+ self._mtime_s = None
+ self._mtime_ns = None
def drop_merge_data(self):
"""remove all "merge-only" from a DirstateItem
@@ -271,7 +276,8 @@
self._p2_info = False
self._mode = None
self._size = None
- self._mtime = None
+ self._mtime_s = None
+ self._mtime_ns = None
@property
def mode(self):
@@ -285,6 +291,14 @@
def mtime(self):
return self.v1_mtime()
+ def mtime_likely_equal_to(self, other_mtime):
+ self_sec = self._mtime_s
+ if self_sec is None:
+ return False
+ self_ns = self._mtime_ns
+ other_sec, other_ns = other_mtime
+ return self_sec == other_sec and self_ns == other_ns
+
@property
def state(self):
"""
@@ -440,7 +454,7 @@
flags |= DIRSTATE_V2_MODE_EXEC_PERM
if stat.S_ISLNK(self.mode):
flags |= DIRSTATE_V2_MODE_IS_SYMLINK
- if self._mtime is not None:
+ if self._mtime_s is not None:
flags |= DIRSTATE_V2_HAS_FILE_MTIME
if self._fallback_exec is not None:
@@ -456,7 +470,7 @@
# Note: we do not need to do anything regarding
# DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
# since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
- return (flags, self._size or 0, self._mtime or 0)
+ return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
def v1_state(self):
"""return a "state" suitable for v1 serialization"""
@@ -504,18 +518,18 @@
raise RuntimeError('untracked item')
elif self.removed:
return 0
- elif self._mtime is None:
+ elif self._mtime_s is None:
return AMBIGUOUS_TIME
elif self._p2_info:
return AMBIGUOUS_TIME
elif not self._p1_tracked:
return AMBIGUOUS_TIME
else:
- return self._mtime
+ return self._mtime_s
def need_delay(self, now):
"""True if the stored mtime would be ambiguous with the current time"""
- return self.v1_state() == b'n' and self.v1_mtime() == now
+ return self.v1_state() == b'n' and self._mtime_s == now[0]
def gettype(q):
@@ -883,7 +897,6 @@
def pack_dirstate(dmap, copymap, pl, now):
- now = int(now)
cs = stringio()
write = cs.write
write(b"".join(pl))
--- a/rust/hg-core/src/dirstate/entry.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/entry.rs Mon Oct 18 11:23:07 2021 +0200
@@ -14,14 +14,15 @@
Merged,
}
-/// The C implementation uses all signed types. This will be an issue
-/// either when 4GB+ source files are commonplace or in 2038, whichever
-/// comes first.
-#[derive(Debug, PartialEq, Copy, Clone)]
+/// `size` and `mtime.seconds` are truncated to 31 bits.
+///
+/// TODO: double-check status algorithm correctness for files
+/// larger than 2 GiB or modified after 2038.
+#[derive(Debug, Copy, Clone)]
pub struct DirstateEntry {
pub(crate) flags: Flags,
mode_size: Option<(u32, u32)>,
- mtime: Option<u32>,
+ mtime: Option<TruncatedTimestamp>,
}
bitflags! {
@@ -37,7 +38,7 @@
}
/// A Unix timestamp with nanoseconds precision
-#[derive(Copy, Clone)]
+#[derive(Debug, Copy, Clone)]
pub struct TruncatedTimestamp {
truncated_seconds: u32,
/// Always in the `0 .. 1_000_000_000` range.
@@ -90,6 +91,11 @@
}
}
+ pub fn to_integer_second(mut self) -> Self {
+ self.nanoseconds = 0;
+ self
+ }
+
/// The lower 31 bits of the number of seconds since the epoch.
pub fn truncated_seconds(&self) -> u32 {
self.truncated_seconds
@@ -182,7 +188,7 @@
p1_tracked: bool,
p2_info: bool,
mode_size: Option<(u32, u32)>,
- mtime: Option<u32>,
+ mtime: Option<TruncatedTimestamp>,
fallback_exec: Option<bool>,
fallback_symlink: Option<bool>,
) -> Self {
@@ -191,9 +197,6 @@
assert!(mode & !RANGE_MASK_31BIT == 0);
assert!(size & !RANGE_MASK_31BIT == 0);
}
- if let Some(mtime) = mtime {
- assert!(mtime & !RANGE_MASK_31BIT == 0);
- }
let mut flags = Flags::empty();
flags.set(Flags::WDIR_TRACKED, wdir_tracked);
flags.set(Flags::P1_TRACKED, p1_tracked);
@@ -252,6 +255,9 @@
let mode = u32::try_from(mode).unwrap();
let size = u32::try_from(size).unwrap();
let mtime = u32::try_from(mtime).unwrap();
+ let mtime =
+ TruncatedTimestamp::from_already_truncated(mtime, 0)
+ .unwrap();
Self {
flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
mode_size: Some((mode, size)),
@@ -344,7 +350,7 @@
bool,
bool,
Option<(u32, u32)>,
- Option<u32>,
+ Option<TruncatedTimestamp>,
Option<bool>,
Option<bool>,
) {
@@ -429,7 +435,7 @@
} else if !self.flags.contains(Flags::P1_TRACKED) {
MTIME_UNSET
} else if let Some(mtime) = self.mtime {
- i32::try_from(mtime).unwrap()
+ i32::try_from(mtime.truncated_seconds()).unwrap()
} else {
MTIME_UNSET
}
@@ -501,6 +507,10 @@
}
}
+ pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
+ self.mtime
+ }
+
pub fn drop_merge_data(&mut self) {
if self.flags.contains(Flags::P2_INFO) {
self.flags.remove(Flags::P2_INFO);
@@ -513,9 +523,13 @@
self.mtime = None
}
- pub fn set_clean(&mut self, mode: u32, size: u32, mtime: u32) {
+ pub fn set_clean(
+ &mut self,
+ mode: u32,
+ size: u32,
+ mtime: TruncatedTimestamp,
+ ) {
let size = size & RANGE_MASK_31BIT;
- let mtime = mtime & RANGE_MASK_31BIT;
self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
self.mode_size = Some((mode, size));
self.mtime = Some(mtime);
@@ -577,8 +591,13 @@
}
/// True if the stored mtime would be ambiguous with the current time
- pub fn need_delay(&self, now: i32) -> bool {
- self.state() == EntryState::Normal && self.mtime() == now
+ pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
+ if let Some(mtime) = self.mtime {
+ self.state() == EntryState::Normal
+ && mtime.truncated_seconds() == now.truncated_seconds()
+ } else {
+ false
+ }
}
}
--- a/rust/hg-core/src/dirstate/parsers.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs Mon Oct 18 11:23:07 2021 +0200
@@ -135,6 +135,3 @@
packed.extend(source.as_bytes());
}
}
-
-/// Seconds since the Unix epoch
-pub struct Timestamp(pub i64);
--- a/rust/hg-core/src/dirstate/status.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/status.rs Mon Oct 18 11:23:07 2021 +0200
@@ -12,6 +12,7 @@
use crate::dirstate_tree::on_disk::DirstateV2ParseError;
use crate::{
+ dirstate::TruncatedTimestamp,
utils::hg_path::{HgPath, HgPathError},
PatternError,
};
@@ -64,7 +65,7 @@
/// Remember the most recent modification timeslot for status, to make
/// sure we won't miss future size-preserving file content modifications
/// that happen within the same timeslot.
- pub last_normal_time: i64,
+ pub last_normal_time: TruncatedTimestamp,
/// Whether we are on a filesystem with UNIX-like exec flags
pub check_exec: bool,
pub list_clean: bool,
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Oct 18 11:23:07 2021 +0200
@@ -1,7 +1,6 @@
use bytes_cast::BytesCast;
use micro_timer::timed;
use std::borrow::Cow;
-use std::convert::TryInto;
use std::path::PathBuf;
use super::on_disk;
@@ -11,7 +10,6 @@
use crate::dirstate::parsers::pack_entry;
use crate::dirstate::parsers::packed_entry_size;
use crate::dirstate::parsers::parse_dirstate_entries;
-use crate::dirstate::parsers::Timestamp;
use crate::dirstate::CopyMapIter;
use crate::dirstate::StateMapIter;
use crate::dirstate::TruncatedTimestamp;
@@ -932,10 +930,9 @@
pub fn pack_v1(
&mut self,
parents: DirstateParents,
- now: Timestamp,
+ now: TruncatedTimestamp,
) -> Result<Vec<u8>, DirstateError> {
let map = self.get_map_mut();
- let now: i32 = now.0.try_into().expect("time overflow");
let mut ambiguous_mtimes = Vec::new();
// Optizimation (to be measured?): pre-compute size to avoid `Vec`
// reallocations
@@ -981,12 +978,10 @@
#[timed]
pub fn pack_v2(
&mut self,
- now: Timestamp,
+ now: TruncatedTimestamp,
can_append: bool,
) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
let map = self.get_map_mut();
- // TODO: how do we want to handle this in 2038?
- let now: i32 = now.0.try_into().expect("time overflow");
let mut paths = Vec::new();
for node in map.iter_nodes() {
let node = node?;
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Oct 18 11:23:07 2021 +0200
@@ -317,7 +317,7 @@
&self,
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
if self.has_entry() {
- Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
+ Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
} else if let Some(mtime) = self.cached_directory_mtime()? {
Ok(dirstate_map::NodeData::CachedDirectory { mtime })
} else {
@@ -357,7 +357,7 @@
file_type | permisions
}
- fn assume_entry(&self) -> DirstateEntry {
+ fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
// TODO: convert through raw bits instead?
let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
@@ -372,11 +372,19 @@
let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
&& !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
{
- Some(self.mtime.truncated_seconds.into())
+ // TODO: replace this by `self.mtime.try_into()?` to use
+ // sub-second precision from the file.
+ // We don’t do this yet because other parts of the code
+ // always set it to zero.
+ let mtime = TruncatedTimestamp::from_already_truncated(
+ self.mtime.truncated_seconds.get(),
+ 0,
+ )?;
+ Some(mtime)
} else {
None
};
- DirstateEntry::from_v2_data(
+ Ok(DirstateEntry::from_v2_data(
wdir_tracked,
p1_tracked,
p2_info,
@@ -384,14 +392,14 @@
mtime,
None,
None,
- )
+ ))
}
pub(super) fn entry(
&self,
) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
if self.has_entry() {
- Ok(Some(self.assume_entry()))
+ Ok(Some(self.assume_entry()?))
} else {
Ok(None)
}
@@ -450,10 +458,7 @@
};
let mtime = if let Some(m) = mtime_opt {
flags.insert(Flags::HAS_FILE_MTIME);
- PackedTruncatedTimestamp {
- truncated_seconds: m.into(),
- nanoseconds: 0.into(),
- }
+ m.into()
} else {
PackedTruncatedTimestamp::null()
};
--- a/rust/hg-core/src/dirstate_tree/status.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs Mon Oct 18 11:23:07 2021 +0200
@@ -501,9 +501,6 @@
fn truncate_u64(value: u64) -> i32 {
(value & 0x7FFF_FFFF) as i32
}
- fn truncate_i64(value: i64) -> i32 {
- (value & 0x7FFF_FFFF) as i32
- }
let entry = dirstate_node
.entry()?
@@ -531,10 +528,19 @@
.modified
.push(hg_path.detach_from_tree())
} else {
- let mtime = mtime_seconds(fs_metadata);
- if truncate_i64(mtime) != entry.mtime()
- || mtime == self.options.last_normal_time
- {
+ let mtime_looks_clean;
+ if let Some(dirstate_mtime) = entry.truncated_mtime() {
+ let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
+ .expect("OS/libc does not support mtime?")
+ // For now don’t use sub-second precision for file mtimes
+ .to_integer_second();
+ mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
+ && !fs_mtime.likely_equal(self.options.last_normal_time)
+ } else {
+ // No mtime in the dirstate entry
+ mtime_looks_clean = false
+ };
+ if !mtime_looks_clean {
self.outcome
.lock()
.unwrap()
@@ -690,15 +696,6 @@
}
}
-#[cfg(unix)] // TODO
-fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
- // Going through `Metadata::modified()` would be portable, but would take
- // care to construct a `SystemTime` value with sub-second precision just
- // for us to throw that away here.
- use std::os::unix::fs::MetadataExt;
- metadata.mtime()
-}
-
struct DirEntry {
base_name: HgPathBuf,
full_path: PathBuf,
--- a/rust/hg-cpython/src/dirstate.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate.rs Mon Oct 18 11:23:07 2021 +0200
@@ -54,7 +54,7 @@
matcher: PyObject,
ignorefiles: PyList,
check_exec: bool,
- last_normal_time: i64,
+ last_normal_time: (u32, u32),
list_clean: bool,
list_ignored: bool,
list_unknown: bool,
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Mon Oct 18 11:23:07 2021 +0200
@@ -18,11 +18,10 @@
use crate::{
dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
- dirstate::item::DirstateItem,
+ dirstate::item::{timestamp, DirstateItem},
pybytes_deref::PyBytesDeref,
};
use hg::{
- dirstate::parsers::Timestamp,
dirstate::StateMapIter,
dirstate_tree::dirstate_map::DirstateMap as TreeDirstateMap,
dirstate_tree::on_disk::DirstateV2ParseError,
@@ -195,9 +194,9 @@
&self,
p1: PyObject,
p2: PyObject,
- now: PyObject
+ now: (u32, u32)
) -> PyResult<PyBytes> {
- let now = Timestamp(now.extract(py)?);
+ let now = timestamp(py, now)?;
let mut inner = self.inner(py).borrow_mut();
let parents = DirstateParents {
@@ -219,10 +218,10 @@
/// instead of written to a new data file (False).
def write_v2(
&self,
- now: PyObject,
+ now: (u32, u32),
can_append: bool,
) -> PyResult<PyObject> {
- let now = Timestamp(now.extract(py)?);
+ let now = timestamp(py, now)?;
let mut inner = self.inner(py).borrow_mut();
let result = inner.pack_v2(now, can_append);
--- a/rust/hg-cpython/src/dirstate/item.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/item.rs Mon Oct 18 11:23:07 2021 +0200
@@ -9,6 +9,7 @@
use cpython::PythonObject;
use hg::dirstate::DirstateEntry;
use hg::dirstate::EntryState;
+use hg::dirstate::TruncatedTimestamp;
use std::cell::Cell;
use std::convert::TryFrom;
@@ -22,7 +23,7 @@
p2_info: bool = false,
has_meaningful_data: bool = true,
has_meaningful_mtime: bool = true,
- parentfiledata: Option<(u32, u32, u32)> = None,
+ parentfiledata: Option<(u32, u32, (u32, u32))> = None,
fallback_exec: Option<bool> = None,
fallback_symlink: Option<bool> = None,
@@ -34,7 +35,7 @@
mode_size_opt = Some((mode, size))
}
if has_meaningful_mtime {
- mtime_opt = Some(mtime)
+ mtime_opt = Some(timestamp(py, mtime)?)
}
}
let entry = DirstateEntry::from_v2_data(
@@ -191,10 +192,19 @@
Ok(mtime)
}
- def need_delay(&self, now: i32) -> PyResult<bool> {
+ def need_delay(&self, now: (u32, u32)) -> PyResult<bool> {
+ let now = timestamp(py, now)?;
Ok(self.entry(py).get().need_delay(now))
}
+ def mtime_likely_equal_to(&self, other: (u32, u32)) -> PyResult<bool> {
+ if let Some(mtime) = self.entry(py).get().truncated_mtime() {
+ Ok(mtime.likely_equal(timestamp(py, other)?))
+ } else {
+ Ok(false)
+ }
+ }
+
@classmethod
def from_v1_data(
_cls,
@@ -220,8 +230,9 @@
&self,
mode: u32,
size: u32,
- mtime: u32,
+ mtime: (u32, u32),
) -> PyResult<PyNone> {
+ let mtime = timestamp(py, mtime)?;
self.update(py, |entry| entry.set_clean(mode, size, mtime));
Ok(PyNone)
}
@@ -261,3 +272,15 @@
self.entry(py).set(entry)
}
}
+
+pub(crate) fn timestamp(
+ py: Python<'_>,
+ (s, ns): (u32, u32),
+) -> PyResult<TruncatedTimestamp> {
+ TruncatedTimestamp::from_already_truncated(s, ns).map_err(|_| {
+ PyErr::new::<exc::ValueError, _>(
+ py,
+ "expected mtime truncated to 31 bits",
+ )
+ })
+}
--- a/rust/hg-cpython/src/dirstate/status.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/status.rs Mon Oct 18 11:23:07 2021 +0200
@@ -9,6 +9,7 @@
//! `hg-core` crate. From Python, this will be seen as
//! `rustext.dirstate.status`.
+use crate::dirstate::item::timestamp;
use crate::{dirstate::DirstateMap, exceptions::FallbackError};
use cpython::exc::OSError;
use cpython::{
@@ -102,12 +103,13 @@
root_dir: PyObject,
ignore_files: PyList,
check_exec: bool,
- last_normal_time: i64,
+ last_normal_time: (u32, u32),
list_clean: bool,
list_ignored: bool,
list_unknown: bool,
collect_traversed_dirs: bool,
) -> PyResult<PyTuple> {
+ let last_normal_time = timestamp(py, last_normal_time)?;
let bytes = root_dir.extract::<PyBytes>(py)?;
let root_dir = get_path_from_bytes(bytes.data(py));
--- a/rust/rhg/src/commands/status.rs Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/rhg/src/commands/status.rs Mon Oct 18 11:23:07 2021 +0200
@@ -11,6 +11,7 @@
use clap::{Arg, SubCommand};
use hg;
use hg::config::Config;
+use hg::dirstate::TruncatedTimestamp;
use hg::errors::HgError;
use hg::manifest::Manifest;
use hg::matchers::AlwaysMatcher;
@@ -180,7 +181,7 @@
// hence be stored on dmap. Using a value that assumes we aren't
// below the time resolution granularity of the FS and the
// dirstate.
- last_normal_time: 0,
+ last_normal_time: TruncatedTimestamp::new_truncate(0, 0),
// we're currently supporting file systems with exec flags only
// anyway
check_exec: true,
--- a/tests/fakedirstatewritetime.py Tue Oct 19 21:03:13 2021 +0200
+++ b/tests/fakedirstatewritetime.py Mon Oct 18 11:23:07 2021 +0200
@@ -15,6 +15,7 @@
policy,
registrar,
)
+from mercurial.dirstateutils import timestamp
from mercurial.utils import dateutil
try:
@@ -40,9 +41,8 @@
def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
# execute what original parsers.pack_dirstate should do actually
# for consistency
- actualnow = int(now)
for f, e in dmap.items():
- if e.need_delay(actualnow):
+ if e.need_delay(now):
e.set_possibly_dirty()
return orig(dmap, copymap, pl, fakenow)
@@ -62,6 +62,7 @@
# parsing 'fakenow' in YYYYmmddHHMM format makes comparison between
# 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy
fakenow = dateutil.parsedate(fakenow, [b'%Y%m%d%H%M'])[0]
+ fakenow = timestamp.timestamp((fakenow, 0))
if has_rust_dirstate:
# The Rust implementation does not use public parse/pack dirstate