Mercurial > hg
view tests/generate-working-copy-states.py @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children | 493034cc3265 |
line wrap: on
line source
# Helper script used for generating history and working copy files and content. # The file's name corresponds to its history. The number of changesets can # be specified on the command line. With 2 changesets, files with names like # content1_content2_content1-untracked are generated. The first two filename # segments describe the contents in the two changesets. The third segment # ("content1-untracked") describes the state in the working copy, i.e. # the file has content "content1" and is untracked (since it was previously # tracked, it has been forgotten). # # This script generates the filenames and their content, but it's up to the # caller to tell hg about the state. # # There are two subcommands: # filelist <numchangesets> # state <numchangesets> (<changeset>|wc) # # Typical usage: # # $ python $TESTDIR/generate-working-copy-states.py state 2 1 # $ hg addremove --similarity 0 # $ hg commit -m 'first' # # $ python $TESTDIR/generate-working-copy-states.py state 2 1 # $ hg addremove --similarity 0 # $ hg commit -m 'second' # # $ python $TESTDIR/generate-working-copy-states.py state 2 wc # $ hg addremove --similarity 0 # $ hg forget *_*_*-untracked # $ rm *_*_missing-* import os import sys # Generates pairs of (filename, contents), where 'contents' is a list # describing the file's content at each revision (or in the working copy). # At each revision, it is either None or the file's actual content. When not # None, it may be either new content or the same content as an earlier # revisions, so all of (modified,clean,added,removed) can be tested. def generatestates(maxchangesets, parentcontents): depth = len(parentcontents) if depth == maxchangesets + 1: for tracked in (b'untracked', b'tracked'): filename = ( b"_".join( [ (content is None and b'missing' or content) for content in parentcontents ] ) + b"-" + tracked ) yield (filename, parentcontents) else: for content in {None, b'content' + (b"%d" % (depth + 1))} | set( parentcontents ): for combination in generatestates( maxchangesets, parentcontents + [content] ): yield combination # retrieve the command line arguments target = sys.argv[1] maxchangesets = int(sys.argv[2]) if target == 'state': depth = sys.argv[3] # sort to make sure we have stable output combinations = sorted(generatestates(maxchangesets, [])) # compute file content content = [] for filename, states in combinations: if target == 'filelist': print(filename.decode('ascii')) elif target == 'state': if depth == 'wc': # Make sure there is content so the file gets written and can be # tracked. It will be deleted outside of this script. content.append((filename, states[maxchangesets] or b'TOBEDELETED')) else: content.append((filename, states[int(depth) - 1])) else: print("unknown target:", target, file=sys.stderr) sys.exit(1) # write actual content for filename, data in content: if data is not None: f = open(filename, 'wb') f.write(data + b'\n') f.close() elif os.path.exists(filename): os.remove(filename)