mercurial/pure/parsers.py
author Siddharth Agarwal <sid0@fb.com>
Sun, 29 Mar 2015 19:42:49 -0700
changeset 24540 25c1d3ca5ff6
parent 24215 feddc5284724
child 24634 4ece2847cf4c
permissions -rw-r--r--
dirstate: split the foldmap into separate ones for files and directories Computing the set of directories in the dirstate can be pretty expensive. For 'hg status' without arguments, it turns out we actually never need to figure out the right case for directories in the foldmap. (An upcoming patch explains why.) This patch splits up the directory and file maps into separate ones, allowing for the subsequent optimization in status.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     1
# parsers.py - Python implementation of parsers.c
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     2
#
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     3
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 7945
diff changeset
     5
# This software may be used and distributed according to the terms of the
10263
25e572394f5c Update license to GPLv2+
Matt Mackall <mpm@selenic.com>
parents: 8225
diff changeset
     6
# GNU General Public License version 2 or any later version.
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
     7
24215
feddc5284724 manifest: move pure parsing code out of pure
Matt Mackall <mpm@selenic.com>
parents: 21809
diff changeset
     8
from mercurial.node import nullid
7945
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
     9
from mercurial import util
18567
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    10
import struct, zlib, cStringIO
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    11
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    12
_pack = struct.pack
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    13
_unpack = struct.unpack
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    14
_compress = zlib.compress
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    15
_decompress = zlib.decompress
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    16
_sha = util.sha1
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    17
21809
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    18
# Some code below makes tuples directly because it's more convenient. However,
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    19
# code outside this module should always use dirstatetuple.
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    20
def dirstatetuple(*x):
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    21
    # x is a tuple
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    22
    return x
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
    23
13261
20a54bdf2328 pure: update index parsing
Matt Mackall <mpm@selenic.com>
parents: 13253
diff changeset
    24
def parse_index2(data, inline):
7945
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    25
    def gettype(q):
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    26
        return int(q & 0xFFFF)
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    27
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    28
    def offset_type(offset, type):
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    29
        return long(long(offset) << 16 | type)
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    30
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    31
    indexformatng = ">Qiiiiii20s12x"
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    32
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    33
    s = struct.calcsize(indexformatng)
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    34
    index = []
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    35
    cache = None
14995
8d928799dab5 parsers: remove redundant 'n' variable in parsers.parse_index2() (issue2935)
py4fun
parents: 14421
diff changeset
    36
    off = 0
13253
61c9bc3da402 revlog: remove lazy index
Matt Mackall <mpm@selenic.com>
parents: 10263
diff changeset
    37
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    38
    l = len(data) - s
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    39
    append = index.append
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    40
    if inline:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    41
        cache = (0, data)
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    42
        while off <= l:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    43
            e = _unpack(indexformatng, data[off:off + s])
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    44
            append(e)
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    45
            if e[1] < 0:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    46
                break
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    47
            off += e[1] + s
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    48
    else:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    49
        while off <= l:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    50
            e = _unpack(indexformatng, data[off:off + s])
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    51
            append(e)
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    52
            off += s
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    53
14421
639f26cab2f5 pure parsers: properly detect corrupt index files
Augie Fackler <durin42@gmail.com>
parents: 14064
diff changeset
    54
    if off != len(data):
639f26cab2f5 pure parsers: properly detect corrupt index files
Augie Fackler <durin42@gmail.com>
parents: 14064
diff changeset
    55
        raise ValueError('corrupt index file')
639f26cab2f5 pure parsers: properly detect corrupt index files
Augie Fackler <durin42@gmail.com>
parents: 14064
diff changeset
    56
13435
90d7ce986565 pure: fix index parsing on empty repositories
Wagner Bruna <wbruna@softwareexpress.com.br>
parents: 13261
diff changeset
    57
    if index:
90d7ce986565 pure: fix index parsing on empty repositories
Wagner Bruna <wbruna@softwareexpress.com.br>
parents: 13261
diff changeset
    58
        e = list(index[0])
90d7ce986565 pure: fix index parsing on empty repositories
Wagner Bruna <wbruna@softwareexpress.com.br>
parents: 13261
diff changeset
    59
        type = gettype(e[0])
90d7ce986565 pure: fix index parsing on empty repositories
Wagner Bruna <wbruna@softwareexpress.com.br>
parents: 13261
diff changeset
    60
        e[0] = offset_type(0, type)
90d7ce986565 pure: fix index parsing on empty repositories
Wagner Bruna <wbruna@softwareexpress.com.br>
parents: 13261
diff changeset
    61
        index[0] = tuple(e)
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    62
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    63
    # add the magic null revision at -1
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    64
    index.append((0, 0, 0, -1, -1, -1, -1, nullid))
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    65
13261
20a54bdf2328 pure: update index parsing
Matt Mackall <mpm@selenic.com>
parents: 13253
diff changeset
    66
    return index, cache
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    67
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    68
def parse_dirstate(dmap, copymap, st):
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    69
    parents = [st[:20], st[20: 40]]
17425
e95ec38f86b0 fix wording and not-completely-trivial spelling errors and bad docstrings
Mads Kiilerich <mads@kiilerich.com>
parents: 14995
diff changeset
    70
    # dereference fields so they will be local in loop
7945
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    71
    format = ">cllll"
94d7e14cfa42 pure/parsers: fix circular imports, import mercurial modules properly
Matt Mackall <mpm@selenic.com>
parents: 7873
diff changeset
    72
    e_size = struct.calcsize(format)
7700
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    73
    pos1 = 40
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    74
    l = len(st)
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    75
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    76
    # the inner loop
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    77
    while pos1 < l:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    78
        pos2 = pos1 + e_size
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    79
        e = _unpack(">cllll", st[pos1:pos2]) # a literal here is faster
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    80
        pos1 = pos2 + e[4]
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    81
        f = st[pos2:pos1]
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    82
        if '\0' in f:
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    83
            f, c = f.split('\0')
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    84
            copymap[f] = c
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    85
        dmap[f] = e[:4]
f410c552fa15 pure Python implementation of parsers.c
Martin Geisler <mg@daimi.au.dk>
parents:
diff changeset
    86
    return parents
18567
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    87
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    88
def pack_dirstate(dmap, copymap, pl, now):
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    89
    now = int(now)
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    90
    cs = cStringIO.StringIO()
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    91
    write = cs.write
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    92
    write("".join(pl))
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    93
    for f, e in dmap.iteritems():
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    94
        if e[0] == 'n' and e[3] == now:
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    95
            # The file was last modified "simultaneously" with the current
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    96
            # write to dirstate (i.e. within the same second for file-
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    97
            # systems with a granularity of 1 sec). This commonly happens
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    98
            # for at least a couple of files on 'update'.
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
    99
            # The user could change the file without changing its size
19652
187bf2dde7c1 pack_dirstate: only invalidate mtime for files written in the last second
Siddharth Agarwal <sid0@fb.com>
parents: 18567
diff changeset
   100
            # within the same second. Invalidate the file's mtime in
18567
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   101
            # dirstate, forcing future 'status' calls to compare the
19652
187bf2dde7c1 pack_dirstate: only invalidate mtime for files written in the last second
Siddharth Agarwal <sid0@fb.com>
parents: 18567
diff changeset
   102
            # contents of the file if the size is the same. This prevents
187bf2dde7c1 pack_dirstate: only invalidate mtime for files written in the last second
Siddharth Agarwal <sid0@fb.com>
parents: 18567
diff changeset
   103
            # mistakenly treating such files as clean.
21809
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19652
diff changeset
   104
            e = dirstatetuple(e[0], e[1], e[2], -1)
18567
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   105
            dmap[f] = e
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   106
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   107
        if f in copymap:
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   108
            f = "%s\0%s" % (f, copymap[f])
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   109
        e = _pack(">cllll", e[0], e[1], e[2], e[3], len(f))
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   110
        write(e)
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   111
        write(f)
194e63c1ccb9 dirstate: move pure python dirstate packing to pure/parsers.py
Siddharth Agarwal <sid0@fb.com>
parents: 17425
diff changeset
   112
    return cs.getvalue()