view hgext/convert/gnuarch.py @ 6389:0231f763ebc8

manifest: improve parsing performance by 8x via a new C extension
author Bryan O'Sullivan <bos@serpentine.com>
date Wed, 26 Mar 2008 10:12:10 -0700
parents e75aab656f46
children f67d1468ac50
line wrap: on
line source

# GNU Arch support for the convert extension

from common import NoRepo, commandline, commit, converter_source
from mercurial.i18n import _
from mercurial import util
import os, shutil, tempfile, stat

class gnuarch_source(converter_source, commandline):

    class gnuarch_rev:
        def __init__(self, rev):
            self.rev = rev
            self.summary = ''
            self.date = None
            self.author = ''
            self.add_files = []
            self.mod_files = []
            self.del_files = []
            self.ren_files = {}
            self.ren_dirs = {}

    def __init__(self, ui, path, rev=None):
        super(gnuarch_source, self).__init__(ui, path, rev=rev)

        if not os.path.exists(os.path.join(path, '{arch}')):
            raise NoRepo(_("%s does not look like a GNU Arch repo" % path))

        # Could use checktool, but we want to check for baz or tla.
        self.execmd = None
        if util.find_exe('baz'):
            self.execmd = 'baz'
        else:
            if util.find_exe('tla'):
                self.execmd = 'tla'
            else:
                raise util.Abort(_('cannot find a GNU Arch tool'))

        commandline.__init__(self, ui, self.execmd)

        self.path = os.path.realpath(path)
        self.tmppath = None

        self.treeversion = None
        self.lastrev = None
        self.changes = {}
        self.parents = {}
        self.tags = {}
        self.modecache = {}

    def before(self):
        if self.execmd == 'tla':
            output = self.run0('tree-version', self.path)
        else:
            output = self.run0('tree-version', '-d', self.path)
        self.treeversion = output.strip()

        self.ui.status(_('analyzing tree version %s...\n' % self.treeversion))

        # Get name of temporary directory
        version = self.treeversion.split('/')
        self.tmppath = os.path.join(tempfile.gettempdir(),
                                    'hg-%s' % version[1])

        # Generate parents dictionary
        child = []
        output, status = self.runlines('revisions', self.treeversion)
        self.checkexit(status, 'archive registered?')
        for l in output:
            rev = l.strip()
            self.changes[rev] = self.gnuarch_rev(rev)

            # Read author, date and summary
            catlog = self.runlines0('cat-log', '-d', self.path, rev)
            self._parsecatlog(catlog, rev)

            self.parents[rev] = child
            child = [rev]
            if rev == self.rev:
                break
        self.parents[None] = child

    def after(self):
        self.ui.debug(_('cleaning up %s\n' % self.tmppath))
        shutil.rmtree(self.tmppath, ignore_errors=True)

    def getheads(self):
        return self.parents[None]

    def getfile(self, name, rev):
        if rev != self.lastrev:
            raise util.Abort(_('internal calling inconsistency'))

        # Raise IOError if necessary (i.e. deleted files).
        if not os.path.exists(os.path.join(self.tmppath, name)):
            raise IOError

        data, mode = self._getfile(name, rev)
        self.modecache[(name, rev)] = mode

        return data

    def getmode(self, name, rev):
        return self.modecache[(name, rev)]

    def getchanges(self, rev):
        self.modecache = {}
        self._update(rev)
        changes = []
        copies = {}

        for f in self.changes[rev].add_files:
            changes.append((f, rev))

        for f in self.changes[rev].mod_files:
            changes.append((f, rev))

        for f in self.changes[rev].del_files:
            changes.append((f, rev))

        for src in self.changes[rev].ren_files:
            to = self.changes[rev].ren_files[src]
            changes.append((src, rev))
            changes.append((to, rev))
            copies[src] = to

        for src in self.changes[rev].ren_dirs:
            to = self.changes[rev].ren_dirs[src]
            chgs, cps = self._rendirchanges(src, to);
            changes += [(f, rev) for f in chgs]
            for c in cps:
                copies[c] = cps[c]

        changes.sort()
        self.lastrev = rev

        return changes, copies

    def getcommit(self, rev):
        changes = self.changes[rev]
        return commit(author = changes.author, date = changes.date,
                      desc = changes.summary, parents = self.parents[rev])

    def gettags(self):
        return self.tags

    def _execute(self, cmd, *args, **kwargs):
        cmdline = [self.execmd, cmd]
        cmdline += args
        cmdline = [util.shellquote(arg) for arg in cmdline]
        cmdline += ['>', util.nulldev, '2>', util.nulldev]
        cmdline = util.quotecommand(' '.join(cmdline))
        self.ui.debug(cmdline, '\n')
        return os.system(cmdline)

    def _update(self, rev):
        if rev == 'base-0':
            # Initialise 'base-0' revision
            self._obtainrevision(rev)
        else:
            self.ui.debug(_('applying revision %s...\n' % rev))
            revision = '%s--%s' % (self.treeversion, rev)
            changeset, status = self.runlines('replay', '-d', self.tmppath,
                                              revision)
            if status:
                # Something went wrong while merging (baz or tla
                # issue?), get latest revision and try from there
                shutil.rmtree(self.tmppath, ignore_errors=True)
                self._obtainrevision(rev)
            else:
                old_rev = self.parents[rev][0]
                self.ui.debug(_('computing changeset between %s and %s...\n' \
                                    % (old_rev, rev)))
                rev_a = '%s--%s' % (self.treeversion, old_rev)
                rev_b = '%s--%s' % (self.treeversion, rev)
                self._parsechangeset(changeset, rev)

    def _getfile(self, name, rev):
        mode = os.lstat(os.path.join(self.tmppath, name)).st_mode
        if stat.S_ISLNK(mode):
            data = os.readlink(os.path.join(self.tmppath, name))
            mode = mode and 'l' or ''
        else:
            data = open(os.path.join(self.tmppath, name), 'rb').read()
            mode = (mode & 0111) and 'x' or ''
        return data, mode

    def _exclude(self, name):
        exclude = [ '{arch}', '.arch-ids', '.arch-inventory' ]
        for exc in exclude:
            if name.find(exc) != -1:
                return True
        return False

    def _readcontents(self, path):
        files = []
        contents = os.listdir(path)
        while len(contents) > 0:
            c = contents.pop()
            p = os.path.join(path, c)
            # os.walk could be used, but here we avoid internal GNU
            # Arch files and directories, thus saving a lot time.
            if not self._exclude(p):
                if os.path.isdir(p):
                    contents += [os.path.join(c, f) for f in os.listdir(p)]
                else:
                    files.append(c)
        return files

    def _rendirchanges(self, src, dest):
        changes = []
        copies = {}
        files = self._readcontents(os.path.join(self.tmppath, dest))
        for f in files:
            s = os.path.join(src, f)
            d = os.path.join(dest, f)
            changes.append(s)
            changes.append(d)
            copies[s] = d
        return changes, copies

    def _obtainrevision(self, rev):
        self.ui.debug(_('obtaining revision %s...\n' % rev))
        revision = '%s--%s' % (self.treeversion, rev)
        output = self._execute('get', revision, self.tmppath)
        self.checkexit(output)
        self.ui.debug(_('analysing revision %s...\n' % rev))
        files = self._readcontents(self.tmppath)
        self.changes[rev].add_files += files

    def _stripbasepath(self, path):
        if path.startswith('./'):
            return path[2:]
        return path

    def _parsecatlog(self, data, rev):
        summary = []
        for l in data:
            l = l.strip()
            if summary:
                summary.append(l)
            elif l.startswith('Summary:'):
                summary.append(l[len('Summary: '):])
            elif l.startswith('Standard-date:'):
                date = l[len('Standard-date: '):]
                strdate = util.strdate(date, '%Y-%m-%d %H:%M:%S')
                self.changes[rev].date = util.datestr(strdate)
            elif l.startswith('Creator:'):
                self.changes[rev].author = l[len('Creator: '):]
        self.changes[rev].summary = '\n'.join(summary)

    def _parsechangeset(self, data, rev):
        for l in data:
            l = l.strip()
            # Added file (ignore added directory)
            if l.startswith('A') and not l.startswith('A/'):
                file = self._stripbasepath(l[1:].strip())
                if not self._exclude(file):
                    self.changes[rev].add_files.append(file)
            # Deleted file (ignore deleted directory)
            elif l.startswith('D') and not l.startswith('D/'):
                file = self._stripbasepath(l[1:].strip())
                if not self._exclude(file):
                    self.changes[rev].del_files.append(file)
            # Modified binary file
            elif l.startswith('Mb'):
                file = self._stripbasepath(l[2:].strip())
                if not self._exclude(file):
                    self.changes[rev].mod_files.append(file)
            # Modified link
            elif l.startswith('M->'):
                file = self._stripbasepath(l[3:].strip())
                if not self._exclude(file):
                    self.changes[rev].mod_files.append(file)
            # Modified file
            elif l.startswith('M'):
                file = self._stripbasepath(l[1:].strip())
                if not self._exclude(file):
                    self.changes[rev].mod_files.append(file)
            # Renamed file (or link)
            elif l.startswith('=>'):
                files = l[2:].strip().split(' ')
                if len(files) == 1:
                    files = l[2:].strip().split('\t')
                src = self._stripbasepath(files[0])
                dst = self._stripbasepath(files[1])
                if not self._exclude(src) and not self._exclude(dst):
                    self.changes[rev].ren_files[src] = dst
            # Conversion from file to link or from link to file (modified)
            elif l.startswith('ch'):
                file = self._stripbasepath(l[2:].strip())
                if not self._exclude(file):
                    self.changes[rev].mod_files.append(file)
            # Renamed directory
            elif l.startswith('/>'):
                dirs = l[2:].strip().split(' ')
                if len(dirs) == 1:
                    dirs = l[2:].strip().split('\t')
                src = self._stripbasepath(dirs[0])
                dst = self._stripbasepath(dirs[1])
                if not self._exclude(src) and not self._exclude(dst):
                    self.changes[rev].ren_dirs[src] = dst