--- a/contrib/synthrepo.py Sun Sep 28 17:15:28 2014 +0200
+++ b/contrib/synthrepo.py Fri Sep 12 22:04:29 2014 -0400
@@ -35,10 +35,10 @@
- Symlinks and binary files are ignored
'''
-import bisect, collections, json, os, random, time, sys
+import bisect, collections, itertools, json, os, random, time, sys
from mercurial import cmdutil, context, patch, scmutil, util, hg
from mercurial.i18n import _
-from mercurial.node import nullrev, nullid
+from mercurial.node import nullrev, nullid, short
testedwith = 'internal'
@@ -208,14 +208,17 @@
@command('synthesize',
[('c', 'count', 0, _('create given number of commits'), _('COUNT')),
- ('', 'dict', '', _('path to a dictionary of words'), _('FILE'))],
+ ('', 'dict', '', _('path to a dictionary of words'), _('FILE')),
+ ('', 'initfiles', 0, _('initial file count to create'), _('COUNT'))],
_('hg synthesize [OPTION].. DESCFILE'))
def synthesize(ui, repo, descpath, **opts):
'''synthesize commits based on a model of an existing repository
The model must have been generated by :hg:`analyze`. Commits will
be generated randomly according to the probabilities described in
- the model.
+ the model. If --initfiles is set, the repository will be seeded with
+ the given number files following the modeled repository's directory
+ structure.
When synthesizing new content, commit descriptions, and user
names, words will be chosen randomly from a dictionary that is
@@ -261,9 +264,19 @@
words = fp.read().splitlines()
fp.close()
+ initdirs = {}
+ if desc['initdirs']:
+ for k, v in desc['initdirs']:
+ initdirs[k.encode('utf-8').replace('.hg', '_hg')] = v
+ initdirs = renamedirs(initdirs, words)
+ initdirscdf = cdf(initdirs)
+
def pick(cdf):
return cdf[0][bisect.bisect_left(cdf[1], random.random())]
+ def pickpath():
+ return os.path.join(pick(initdirscdf), random.choice(words))
+
def makeline(minimum=0):
total = max(minimum, pick(linelengths))
c, l = 0, []
@@ -280,8 +293,38 @@
progress = ui.progress
_synthesizing = _('synthesizing')
+ _files = _('initial files')
_changesets = _('changesets')
+ # Synthesize a single initial revision adding files to the repo according
+ # to the modeled directory structure.
+ initcount = int(opts['initfiles'])
+ if initcount and initdirs:
+ pctx = repo[None].parents()[0]
+ files = {}
+ for i in xrange(0, initcount):
+ ui.progress(_synthesizing, i, unit=_files, total=initcount)
+
+ path = pickpath()
+ while path in pctx.dirs():
+ path = pickpath()
+ data = '%s contents\n' % path
+ files[path] = context.memfilectx(repo, path, data)
+
+ def filectxfn(repo, memctx, path):
+ return files[path]
+
+ ui.progress(_synthesizing, None)
+ message = 'synthesized wide repo with %d files' % (len(files),)
+ mc = context.memctx(repo, [pctx.node(), nullid], message,
+ files.iterkeys(), filectxfn, ui.username(),
+ '%d %d' % util.makedate())
+ initnode = mc.commit()
+ hexfn = ui.debugflag and hex or short
+ ui.status(_('added commit %s with %d files\n')
+ % (hexfn(initnode), len(files)))
+
+ # Synthesize incremental revisions to the repository, adding repo depth.
count = int(opts['count'])
heads = set(map(repo.changelog.rev, repo.heads()))
for i in xrange(count):
@@ -374,3 +417,26 @@
lock.release()
wlock.release()
+
+def renamedirs(dirs, words):
+ '''Randomly rename the directory names in the per-dir file count dict.'''
+ wordgen = itertools.cycle(words)
+ replacements = {'': ''}
+ def rename(dirpath):
+ '''Recursively rename the directory and all path prefixes.
+
+ The mapping from path to renamed path is stored for all path prefixes
+ as in dynamic programming, ensuring linear runtime and consistent
+ renaming regardless of iteration order through the model.
+ '''
+ if dirpath in replacements:
+ return replacements[dirpath]
+ head, _ = os.path.split(dirpath)
+ head = head and rename(head) or ''
+ renamed = os.path.join(head, wordgen.next())
+ replacements[dirpath] = renamed
+ return renamed
+ result = []
+ for dirpath, count in dirs.iteritems():
+ result.append([rename(dirpath.lstrip(os.sep)), count])
+ return result