hgext: add largefiles extension
This code has a number of contributors and a complicated history prior to its
introduction that can be seen by visiting:
https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles
http://hg.gerg.ca/hg-bfiles
and looking at the included copyright notices and contributors list.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/CONTRIBUTORS Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,4 @@
+Greg Ward, author of the original bfiles extension
+Na'Tosha Bard of Unity Technologies
+Fog Creek Software
+Special thanks to the University of Toronto and the UCOSP program
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/__init__.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,40 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''track large binary files
+
+Large binary files tend to be not very compressible, not very "diffable", and
+not at all mergeable. Such files are not handled well by Mercurial\'s storage
+format (revlog), which is based on compressed binary deltas. largefiles solves
+this problem by adding a centralized client-server layer on top of Mercurial:
+largefiles live in a *central store* out on the network somewhere, and you only
+fetch the ones that you need when you need them.
+
+largefiles works by maintaining a *standin* in .hglf/ for each largefile. The
+standins are small (41 bytes: an SHA-1 hash plus newline) and are tracked by
+Mercurial. Largefile revisions are identified by the SHA-1 hash of their
+contents, which is written to the standin. largefiles uses that revision ID to
+get/put largefile revisions from/to the central store.
+
+A complete tutorial for using lfiles is included in ``usage.txt`` in the lfiles
+source distribution. See
+https://developers.kilnhg.com/Repo/Kiln/largefiles/largefiles/File/usage.txt
+'''
+
+from mercurial import commands
+
+import lfcommands
+import reposetup
+import uisetup
+
+reposetup = reposetup.reposetup
+uisetup = uisetup.uisetup
+
+commands.norepo += " lfconvert"
+
+cmdtable = lfcommands.cmdtable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/basestore.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,201 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Base class for store implementations and store-related utility code.'''
+
+import os
+import tempfile
+import binascii
+import re
+
+from mercurial import util, node, hg
+from mercurial.i18n import _
+
+import lfutil
+
+class StoreError(Exception):
+ '''Raised when there is a problem getting files from or putting
+ files to a central store.'''
+ def __init__(self, filename, hash, url, detail):
+ self.filename = filename
+ self.hash = hash
+ self.url = url
+ self.detail = detail
+
+ def longmessage(self):
+ if self.url:
+ return ('%s: %s\n'
+ '(failed URL: %s)\n'
+ % (self.filename, self.detail, self.url))
+ else:
+ return ('%s: %s\n'
+ '(no default or default-push path set in hgrc)\n'
+ % (self.filename, self.detail))
+
+ def __str__(self):
+ return "%s: %s" % (self.url, self.detail)
+
+class basestore(object):
+ def __init__(self, ui, repo, url):
+ self.ui = ui
+ self.repo = repo
+ self.url = url
+
+ def put(self, source, hash):
+ '''Put source file into the store under <filename>/<hash>.'''
+ raise NotImplementedError('abstract method')
+
+ def exists(self, hash):
+ '''Check to see if the store contains the given hash.'''
+ raise NotImplementedError('abstract method')
+
+ def get(self, files):
+ '''Get the specified largefiles from the store and write to local
+ files under repo.root. files is a list of (filename, hash)
+ tuples. Return (success, missing), lists of files successfuly
+ downloaded and those not found in the store. success is a list
+ of (filename, hash) tuples; missing is a list of filenames that
+ we could not get. (The detailed error message will already have
+ been presented to the user, so missing is just supplied as a
+ summary.)'''
+ success = []
+ missing = []
+ ui = self.ui
+
+ at = 0
+ for filename, hash in files:
+ ui.progress(_('getting largefiles'), at, unit='lfile',
+ total=len(files))
+ at += 1
+ ui.note(_('getting %s:%s\n') % (filename, hash))
+
+ cachefilename = lfutil.cachepath(self.repo, hash)
+ cachedir = os.path.dirname(cachefilename)
+
+ # No need to pass mode='wb' to fdopen(), since mkstemp() already
+ # opened the file in binary mode.
+ (tmpfd, tmpfilename) = tempfile.mkstemp(
+ dir=cachedir, prefix=os.path.basename(filename))
+ tmpfile = os.fdopen(tmpfd, 'w')
+
+ try:
+ hhash = binascii.hexlify(self._getfile(tmpfile, filename, hash))
+ except StoreError, err:
+ ui.warn(err.longmessage())
+ hhash = ""
+
+ if hhash != hash:
+ if hhash != "":
+ ui.warn(_('%s: data corruption (expected %s, got %s)\n')
+ % (filename, hash, hhash))
+ tmpfile.close() # no-op if it's already closed
+ os.remove(tmpfilename)
+ missing.append(filename)
+ continue
+
+ if os.path.exists(cachefilename): # Windows
+ os.remove(cachefilename)
+ os.rename(tmpfilename, cachefilename)
+ lfutil.linktosystemcache(self.repo, hash)
+ success.append((filename, hhash))
+
+ ui.progress(_('getting largefiles'), None)
+ return (success, missing)
+
+ def verify(self, revs, contents=False):
+ '''Verify the existence (and, optionally, contents) of every big
+ file revision referenced by every changeset in revs.
+ Return 0 if all is well, non-zero on any errors.'''
+ write = self.ui.write
+ failed = False
+
+ write(_('searching %d changesets for largefiles\n') % len(revs))
+ verified = set() # set of (filename, filenode) tuples
+
+ for rev in revs:
+ cctx = self.repo[rev]
+ cset = "%d:%s" % (cctx.rev(), node.short(cctx.node()))
+
+ failed = lfutil.any_(self._verifyfile(
+ cctx, cset, contents, standin, verified) for standin in cctx)
+
+ num_revs = len(verified)
+ num_lfiles = len(set([fname for (fname, fnode) in verified]))
+ if contents:
+ write(_('verified contents of %d revisions of %d largefiles\n')
+ % (num_revs, num_lfiles))
+ else:
+ write(_('verified existence of %d revisions of %d largefiles\n')
+ % (num_revs, num_lfiles))
+
+ return int(failed)
+
+ def _getfile(self, tmpfile, filename, hash):
+ '''Fetch one revision of one file from the store and write it
+ to tmpfile. Compute the hash of the file on-the-fly as it
+ downloads and return the binary hash. Close tmpfile. Raise
+ StoreError if unable to download the file (e.g. it does not
+ exist in the store).'''
+ raise NotImplementedError('abstract method')
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ '''Perform the actual verification of a file in the store.
+ '''
+ raise NotImplementedError('abstract method')
+
+import localstore, wirestore
+
+_storeprovider = {
+ 'file': [localstore.localstore],
+ 'http': [wirestore.wirestore],
+ 'https': [wirestore.wirestore],
+ 'ssh': [wirestore.wirestore],
+ }
+
+_scheme_re = re.compile(r'^([a-zA-Z0-9+-.]+)://')
+
+# During clone this function is passed the src's ui object
+# but it needs the dest's ui object so it can read out of
+# the config file. Use repo.ui instead.
+def _openstore(repo, remote=None, put=False):
+ ui = repo.ui
+
+ if not remote:
+ path = getattr(repo, 'lfpullsource', None) or \
+ ui.expandpath('default-push', 'default')
+ # If 'default-push' and 'default' can't be expanded
+ # they are just returned. In that case use the empty string which
+ # use the filescheme.
+ if path == 'default-push' or path == 'default':
+ path = ''
+ remote = repo
+ else:
+ remote = hg.peer(repo, {}, path)
+
+ # The path could be a scheme so use Mercurial's normal functionality
+ # to resolve the scheme to a repository and use its path
+ path = hasattr(remote, 'url') and remote.url() or remote.path
+
+ match = _scheme_re.match(path)
+ if not match: # regular filesystem path
+ scheme = 'file'
+ else:
+ scheme = match.group(1)
+
+ try:
+ storeproviders = _storeprovider[scheme]
+ except KeyError:
+ raise util.Abort(_('unsupported URL scheme %r') % scheme)
+
+ for class_obj in storeproviders:
+ try:
+ return class_obj(ui, repo, remote)
+ except lfutil.storeprotonotcapable:
+ pass
+
+ raise util.Abort(_('%s does not appear to be a lfile store'), path)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/design.txt Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,49 @@
+= largefiles - manage large binary files =
+This extension is based off of Greg Ward's bfiles extension which can be found
+at http://mercurial.selenic.com/wiki/BfilesExtension.
+
+== The largefile store ==
+
+largefile stores are, in the typical use case, centralized servers that have
+every past revision of a given binary file. Each largefile is identified by
+its sha1 hash, and all interactions with the store take one of the following
+forms.
+
+-Download a bfile with this hash
+-Upload a bfile with this hash
+-Check if the store has a bfile with this hash
+
+largefiles stores can take one of two forms:
+
+-Directories on a network file share
+-Mercurial wireproto servers, either via ssh or http (hgweb)
+
+== The Local Repository ==
+
+The local repository has a largefile cache in .hg/largefiles which holds a
+subset of the largefiles needed. On a clone only the largefiles at tip are
+downloaded. When largefiles are downloaded from the central store, a copy is
+saved in this store.
+
+== The Global Cache ==
+
+largefiles in a local repository cache are hardlinked to files in the global
+cache. Before a file is downloaded we check if it is in the global cache.
+
+== Implementation Details ==
+
+Each largefile has a standin which is in .hglf. The standin is tracked by
+Mercurial. The standin contains the SHA1 hash of the largefile. When a
+largefile is added/removed/copied/renamed/etc the same operation is applied to
+the standin. Thus the history of the standin is the history of the largefile.
+
+For performance reasons, the contents of a standin are only updated before a
+commit. Standins are added/removed/copied/renamed from add/remove/copy/rename
+Mercurial commands but their contents will not be updated. The contents of a
+standin will always be the hash of the largefile as of the last commit. To
+support some commands (revert) some standins are temporarily updated but will
+be changed back after the command is finished.
+
+A Mercurial dirstate object tracks the state of the largefiles. The dirstate
+uses the last modified time and current size to detect if a file has changed
+(without reading the entire contents of the file).
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/lfcommands.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,483 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''High-level command functions: lfadd() et. al, plus the cmdtable.'''
+
+import os
+import shutil
+
+from mercurial import util, match as match_, hg, node, context, error
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+# -- Commands ----------------------------------------------------------
+
+def lfconvert(ui, src, dest, *pats, **opts):
+ '''Convert a normal repository to a largefiles repository
+
+ Convert source repository creating an identical repository, except that all
+ files that match the patterns given, or are over the given size will be
+ added as largefiles. The size used to determine whether or not to track a
+ file as a largefile is the size of the first version of the file. After
+ running this command you will need to make sure that largefiles is enabled
+ anywhere you intend to push the new repository.'''
+
+ if opts['tonormal']:
+ tolfile = False
+ else:
+ tolfile = True
+ size = opts['size']
+ if not size:
+ size = ui.config(lfutil.longname, 'size', default=None)
+ try:
+ size = int(size)
+ except ValueError:
+ raise util.Abort(_('largefiles.size must be integer, was %s\n') % \
+ size)
+ except TypeError:
+ raise util.Abort(_('size must be specified'))
+
+ try:
+ rsrc = hg.repository(ui, src)
+ if not rsrc.local():
+ raise util.Abort(_('%s is not a local Mercurial repo') % src)
+ except error.RepoError, err:
+ ui.traceback()
+ raise util.Abort(err.args[0])
+ if os.path.exists(dest):
+ if not os.path.isdir(dest):
+ raise util.Abort(_('destination %s already exists') % dest)
+ elif os.listdir(dest):
+ raise util.Abort(_('destination %s is not empty') % dest)
+ try:
+ ui.status(_('initializing destination %s\n') % dest)
+ rdst = hg.repository(ui, dest, create=True)
+ if not rdst.local():
+ raise util.Abort(_('%s is not a local Mercurial repo') % dest)
+ except error.RepoError:
+ ui.traceback()
+ raise util.Abort(_('%s is not a repo') % dest)
+
+ try:
+ # Lock destination to prevent modification while it is converted to.
+ # Don't need to lock src because we are just reading from its history
+ # which can't change.
+ dst_lock = rdst.lock()
+
+ # Get a list of all changesets in the source. The easy way to do this
+ # is to simply walk the changelog, using changelog.nodesbewteen().
+ # Take a look at mercurial/revlog.py:639 for more details.
+ # Use a generator instead of a list to decrease memory usage
+ ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
+ rsrc.heads())[0])
+ revmap = {node.nullid: node.nullid}
+ if tolfile:
+ lfiles = set()
+ normalfiles = set()
+ if not pats:
+ pats = ui.config(lfutil.longname, 'patterns', default=())
+ if pats:
+ pats = pats.split(' ')
+ if pats:
+ matcher = match_.match(rsrc.root, '', list(pats))
+ else:
+ matcher = None
+
+ lfiletohash = {}
+ for ctx in ctxs:
+ ui.progress(_('converting revisions'), ctx.rev(),
+ unit=_('revision'), total=rsrc['tip'].rev())
+ _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
+ lfiles, normalfiles, matcher, size, lfiletohash)
+ ui.progress(_('converting revisions'), None)
+
+ if os.path.exists(rdst.wjoin(lfutil.shortname)):
+ shutil.rmtree(rdst.wjoin(lfutil.shortname))
+
+ for f in lfiletohash.keys():
+ if os.path.isfile(rdst.wjoin(f)):
+ os.unlink(rdst.wjoin(f))
+ try:
+ os.removedirs(os.path.dirname(rdst.wjoin(f)))
+ except:
+ pass
+
+ else:
+ for ctx in ctxs:
+ ui.progress(_('converting revisions'), ctx.rev(),
+ unit=_('revision'), total=rsrc['tip'].rev())
+ _addchangeset(ui, rsrc, rdst, ctx, revmap)
+
+ ui.progress(_('converting revisions'), None)
+ except:
+ # we failed, remove the new directory
+ shutil.rmtree(rdst.root)
+ raise
+ finally:
+ dst_lock.release()
+
+def _addchangeset(ui, rsrc, rdst, ctx, revmap):
+ # Convert src parents to dst parents
+ parents = []
+ for p in ctx.parents():
+ parents.append(revmap[p.node()])
+ while len(parents) < 2:
+ parents.append(node.nullid)
+
+ # Generate list of changed files
+ files = set(ctx.files())
+ if node.nullid not in parents:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ files |= (set(mp1) | set(mp2)) - set(mc)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+
+ def getfilectx(repo, memctx, f):
+ if lfutil.standin(f) in files:
+ # if the file isn't in the manifest then it was removed
+ # or renamed, raise IOError to indicate this
+ try:
+ fctx = ctx.filectx(lfutil.standin(f))
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = lfutil.splitstandin(renamed[0])
+
+ hash = fctx.data().strip()
+ path = lfutil.findfile(rsrc, hash)
+ ### TODO: What if the file is not cached?
+ data = ''
+ fd = None
+ try:
+ fd = open(path, 'rb')
+ data = fd.read()
+ finally:
+ if fd: fd.close()
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+ else:
+ try:
+ fctx = ctx.filectx(f)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = renamed[0]
+ data = fctx.data()
+ if f == '.hgtags':
+ newdata = []
+ for line in data.splitlines():
+ id, name = line.split(' ', 1)
+ newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+ name))
+ data = ''.join(newdata)
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+
+ dstfiles = []
+ for file in files:
+ if lfutil.isstandin(file):
+ dstfiles.append(lfutil.splitstandin(file))
+ else:
+ dstfiles.append(file)
+ # Commit
+ mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+ getfilectx, ctx.user(), ctx.date(), ctx.extra())
+ ret = rdst.commitctx(mctx)
+ rdst.dirstate.setparents(ret)
+ revmap[ctx.node()] = rdst.changelog.tip()
+
+def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
+ matcher, size, lfiletohash):
+ # Convert src parents to dst parents
+ parents = []
+ for p in ctx.parents():
+ parents.append(revmap[p.node()])
+ while len(parents) < 2:
+ parents.append(node.nullid)
+
+ # Generate list of changed files
+ files = set(ctx.files())
+ if node.nullid not in parents:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ files |= (set(mp1) | set(mp2)) - set(mc)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+
+ dstfiles = []
+ for f in files:
+ if f not in lfiles and f not in normalfiles:
+ islfile = _islfile(f, ctx, matcher, size)
+ # If this file was renamed or copied then copy
+ # the lfileness of its predecessor
+ if f in ctx.manifest():
+ fctx = ctx.filectx(f)
+ renamed = fctx.renamed()
+ renamedlfile = renamed and renamed[0] in lfiles
+ islfile |= renamedlfile
+ if 'l' in fctx.flags():
+ if renamedlfile:
+ raise util.Abort(
+ _('Renamed/copied largefile %s becomes symlink') % f)
+ islfile = False
+ if islfile:
+ lfiles.add(f)
+ else:
+ normalfiles.add(f)
+
+ if f in lfiles:
+ dstfiles.append(lfutil.standin(f))
+ # lfile in manifest if it has not been removed/renamed
+ if f in ctx.manifest():
+ if 'l' in ctx.filectx(f).flags():
+ if renamed and renamed[0] in lfiles:
+ raise util.Abort(_('largefile %s becomes symlink') % f)
+
+ # lfile was modified, update standins
+ fullpath = rdst.wjoin(f)
+ lfutil.createdir(os.path.dirname(fullpath))
+ m = util.sha1('')
+ m.update(ctx[f].data())
+ hash = m.hexdigest()
+ if f not in lfiletohash or lfiletohash[f] != hash:
+ try:
+ fd = open(fullpath, 'wb')
+ fd.write(ctx[f].data())
+ finally:
+ if fd:
+ fd.close()
+ executable = 'x' in ctx[f].flags()
+ os.chmod(fullpath, lfutil.getmode(executable))
+ lfutil.writestandin(rdst, lfutil.standin(f), hash,
+ executable)
+ lfiletohash[f] = hash
+ else:
+ # normal file
+ dstfiles.append(f)
+
+ def getfilectx(repo, memctx, f):
+ if lfutil.isstandin(f):
+ # if the file isn't in the manifest then it was removed
+ # or renamed, raise IOError to indicate this
+ srcfname = lfutil.splitstandin(f)
+ try:
+ fctx = ctx.filectx(srcfname)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ # standin is always a lfile because lfileness
+ # doesn't change after rename or copy
+ renamed = lfutil.standin(renamed[0])
+
+ return context.memfilectx(f, lfiletohash[srcfname], 'l' in
+ fctx.flags(), 'x' in fctx.flags(), renamed)
+ else:
+ try:
+ fctx = ctx.filectx(f)
+ except error.LookupError:
+ raise IOError()
+ renamed = fctx.renamed()
+ if renamed:
+ renamed = renamed[0]
+
+ data = fctx.data()
+ if f == '.hgtags':
+ newdata = []
+ for line in data.splitlines():
+ id, name = line.split(' ', 1)
+ newdata.append('%s %s\n' % (node.hex(revmap[node.bin(id)]),
+ name))
+ data = ''.join(newdata)
+ return context.memfilectx(f, data, 'l' in fctx.flags(),
+ 'x' in fctx.flags(), renamed)
+
+ # Commit
+ mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
+ getfilectx, ctx.user(), ctx.date(), ctx.extra())
+ ret = rdst.commitctx(mctx)
+ rdst.dirstate.setparents(ret)
+ revmap[ctx.node()] = rdst.changelog.tip()
+
+def _islfile(file, ctx, matcher, size):
+ '''
+ A file is a lfile if it matches a pattern or is over
+ the given size.
+ '''
+ # Never store hgtags or hgignore as lfiles
+ if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
+ return False
+ if matcher and matcher(file):
+ return True
+ try:
+ return ctx.filectx(file).size() >= size * 1024 * 1024
+ except error.LookupError:
+ return False
+
+def uploadlfiles(ui, rsrc, rdst, files):
+ '''upload largefiles to the central store'''
+
+ # Don't upload locally. All largefiles are in the system wide cache
+ # so the other repo can just get them from there.
+ if not files or rdst.local():
+ return
+
+ store = basestore._openstore(rsrc, rdst, put=True)
+
+ at = 0
+ files = filter(lambda h: not store.exists(h), files)
+ for hash in files:
+ ui.progress(_('uploading largefiles'), at, unit='largefile', total=len(files))
+ source = lfutil.findfile(rsrc, hash)
+ if not source:
+ raise util.Abort(_('Missing largefile %s needs to be uploaded') % hash)
+ # XXX check for errors here
+ store.put(source, hash)
+ at += 1
+ ui.progress('uploading largefiles', None)
+
+def verifylfiles(ui, repo, all=False, contents=False):
+ '''Verify that every big file revision in the current changeset
+ exists in the central store. With --contents, also verify that
+ the contents of each big file revision are correct (SHA-1 hash
+ matches the revision ID). With --all, check every changeset in
+ this repository.'''
+ if all:
+ # Pass a list to the function rather than an iterator because we know a
+ # list will work.
+ revs = range(len(repo))
+ else:
+ revs = ['.']
+
+ store = basestore._openstore(repo)
+ return store.verify(revs, contents=contents)
+
+def cachelfiles(ui, repo, node):
+ '''cachelfiles ensures that all largefiles needed by the specified revision
+ are present in the repository's largefile cache.
+
+ returns a tuple (cached, missing). cached is the list of files downloaded
+ by this operation; missing is the list of files that were needed but could
+ not be found.'''
+ lfiles = lfutil.listlfiles(repo, node)
+ toget = []
+
+ for lfile in lfiles:
+ expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
+ # if it exists and its hash matches, it might have been locally
+ # modified before updating and the user chose 'local'. in this case,
+ # it will not be in any store, so don't look for it.
+ if (not os.path.exists(repo.wjoin(lfile)) \
+ or expectedhash != lfutil.hashfile(repo.wjoin(lfile))) and \
+ not lfutil.findfile(repo, expectedhash):
+ toget.append((lfile, expectedhash))
+
+ if toget:
+ store = basestore._openstore(repo)
+ ret = store.get(toget)
+ return ret
+
+ return ([], [])
+
+def updatelfiles(ui, repo, filelist=None, printmessage=True):
+ wlock = repo.wlock()
+ try:
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
+
+ if filelist is not None:
+ lfiles = [f for f in lfiles if f in filelist]
+
+ printed = False
+ if printmessage and lfiles:
+ ui.status(_('getting changed largefiles\n'))
+ printed = True
+ cachelfiles(ui, repo, '.')
+
+ updated, removed = 0, 0
+ for i in map(lambda f: _updatelfile(repo, lfdirstate, f), lfiles):
+ # increment the appropriate counter according to _updatelfile's
+ # return value
+ updated += i > 0 and i or 0
+ removed -= i < 0 and i or 0
+ if printmessage and (removed or updated) and not printed:
+ ui.status(_('getting changed largefiles\n'))
+ printed = True
+
+ lfdirstate.write()
+ if printed and printmessage:
+ ui.status(_('%d largefiles updated, %d removed\n') % (updated,
+ removed))
+ finally:
+ wlock.release()
+
+def _updatelfile(repo, lfdirstate, lfile):
+ '''updates a single largefile and copies the state of its standin from
+ the repository's dirstate to its state in the lfdirstate.
+
+ returns 1 if the file was modified, -1 if the file was removed, 0 if the
+ file was unchanged, and None if the needed largefile was missing from the
+ cache.'''
+ ret = 0
+ abslfile = repo.wjoin(lfile)
+ absstandin = repo.wjoin(lfutil.standin(lfile))
+ if os.path.exists(absstandin):
+ if os.path.exists(absstandin+'.orig'):
+ shutil.copyfile(abslfile, abslfile+'.orig')
+ expecthash = lfutil.readstandin(repo, lfile)
+ if expecthash != '' and \
+ (not os.path.exists(abslfile) or \
+ expecthash != lfutil.hashfile(abslfile)):
+ if not lfutil.copyfromcache(repo, expecthash, lfile):
+ return None # don't try to set the mode or update the dirstate
+ ret = 1
+ mode = os.stat(absstandin).st_mode
+ if mode != os.stat(abslfile).st_mode:
+ os.chmod(abslfile, mode)
+ ret = 1
+ else:
+ if os.path.exists(abslfile):
+ os.unlink(abslfile)
+ ret = -1
+ state = repo.dirstate[lfutil.standin(lfile)]
+ if state == 'n':
+ lfdirstate.normal(lfile)
+ elif state == 'r':
+ lfdirstate.remove(lfile)
+ elif state == 'a':
+ lfdirstate.add(lfile)
+ elif state == '?':
+ try:
+ # Mercurial >= 1.9
+ lfdirstate.drop(lfile)
+ except AttributeError:
+ # Mercurial <= 1.8
+ lfdirstate.forget(lfile)
+ return ret
+
+# -- hg commands declarations ------------------------------------------------
+
+
+cmdtable = {
+ 'lfconvert': (lfconvert,
+ [('s', 'size', 0, 'All files over this size (in megabytes) '
+ 'will be considered largefiles. This can also be specified in '
+ 'your hgrc as [largefiles].size.'),
+ ('','tonormal',False,
+ 'Convert from a largefiles repo to a normal repo')],
+ _('hg lfconvert SOURCE DEST [FILE ...]')),
+ }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/lfutil.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,502 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''largefiles utility code: must not import other modules in this package.'''
+
+import os
+import errno
+import inspect
+import shutil
+import stat
+import hashlib
+
+from mercurial import cmdutil, dirstate, httpconnection, match as match_, \
+ url as url_, util
+from mercurial.i18n import _
+
+try:
+ from mercurial import scmutil
+except ImportError:
+ pass
+
+shortname = '.hglf'
+longname = 'largefiles'
+
+
+# -- Portability wrappers ----------------------------------------------
+
+if 'subrepos' in inspect.getargspec(dirstate.dirstate.status)[0]:
+ # for Mercurial >= 1.5
+ def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
+ return dirstate.walk(matcher, [], unknown, ignored)
+else:
+ # for Mercurial <= 1.4
+ def dirstate_walk(dirstate, matcher, unknown=False, ignored=False):
+ return dirstate.walk(matcher, unknown, ignored)
+
+def repo_add(repo, list):
+ try:
+ # Mercurial <= 1.5
+ add = repo.add
+ except AttributeError:
+ # Mercurial >= 1.6
+ add = repo[None].add
+ return add(list)
+
+def repo_remove(repo, list, unlink=False):
+ try:
+ # Mercurial <= 1.5
+ remove = repo.remove
+ except AttributeError:
+ # Mercurial >= 1.6
+ try:
+ # Mercurial <= 1.8
+ remove = repo[None].remove
+ except AttributeError:
+ # Mercurial >= 1.9
+ def remove(list, unlink):
+ wlock = repo.wlock()
+ try:
+ if unlink:
+ for f in list:
+ try:
+ util.unlinkpath(repo.wjoin(f))
+ except OSError, inst:
+ if inst.errno != errno.ENOENT:
+ raise
+ repo[None].forget(list)
+ finally:
+ wlock.release()
+
+ return remove(list, unlink=unlink)
+
+def repo_forget(repo, list):
+ try:
+ # Mercurial <= 1.5
+ forget = repo.forget
+ except AttributeError:
+ # Mercurial >= 1.6
+ forget = repo[None].forget
+ return forget(list)
+
+def findoutgoing(repo, remote, force):
+ # First attempt is for Mercurial <= 1.5 second is for >= 1.6
+ try:
+ return repo.findoutgoing(remote)
+ except AttributeError:
+ from mercurial import discovery
+ try:
+ # Mercurial <= 1.8
+ return discovery.findoutgoing(repo, remote, force=force)
+ except AttributeError:
+ # Mercurial >= 1.9
+ common, _anyinc, _heads = discovery.findcommonincoming(repo,
+ remote, force=force)
+ return repo.changelog.findmissing(common)
+
+# -- Private worker functions ------------------------------------------
+
+if os.name == 'nt':
+ from mercurial import win32
+ try:
+ linkfn = win32.oslink
+ except:
+ linkfn = win32.os_link
+else:
+ linkfn = os.link
+
+def link(src, dest):
+ try:
+ linkfn(src, dest)
+ except OSError:
+ # If hardlinks fail fall back on copy
+ shutil.copyfile(src, dest)
+ os.chmod(dest, os.stat(src).st_mode)
+
+def systemcachepath(ui, hash):
+ path = ui.config(longname, 'systemcache', None)
+ if path:
+ path = os.path.join(path, hash)
+ else:
+ if os.name == 'nt':
+ path = os.path.join(os.getenv('LOCALAPPDATA') or \
+ os.getenv('APPDATA'), longname, hash)
+ elif os.name == 'posix':
+ path = os.path.join(os.getenv('HOME'), '.' + longname, hash)
+ else:
+ raise util.Abort(_('Unknown operating system: %s\n') % os.name)
+ return path
+
+def insystemcache(ui, hash):
+ return os.path.exists(systemcachepath(ui, hash))
+
+def findfile(repo, hash):
+ if incache(repo, hash):
+ repo.ui.note(_('Found %s in cache\n') % hash)
+ return cachepath(repo, hash)
+ if insystemcache(repo.ui, hash):
+ repo.ui.note(_('Found %s in system cache\n') % hash)
+ return systemcachepath(repo.ui, hash)
+ return None
+
+class largefiles_dirstate(dirstate.dirstate):
+ def __getitem__(self, key):
+ return super(largefiles_dirstate, self).__getitem__(unixpath(key))
+ def normal(self, f):
+ return super(largefiles_dirstate, self).normal(unixpath(f))
+ def remove(self, f):
+ return super(largefiles_dirstate, self).remove(unixpath(f))
+ def add(self, f):
+ return super(largefiles_dirstate, self).add(unixpath(f))
+ def drop(self, f):
+ return super(largefiles_dirstate, self).drop(unixpath(f))
+ def forget(self, f):
+ return super(largefiles_dirstate, self).forget(unixpath(f))
+
+def openlfdirstate(ui, repo):
+ '''
+ Return a dirstate object that tracks big files: i.e. its root is the
+ repo root, but it is saved in .hg/largefiles/dirstate.
+ '''
+ admin = repo.join(longname)
+ try:
+ # Mercurial >= 1.9
+ opener = scmutil.opener(admin)
+ except ImportError:
+ # Mercurial <= 1.8
+ opener = util.opener(admin)
+ if hasattr(repo.dirstate, '_validate'):
+ lfdirstate = largefiles_dirstate(opener, ui, repo.root,
+ repo.dirstate._validate)
+ else:
+ lfdirstate = largefiles_dirstate(opener, ui, repo.root)
+
+ # If the largefiles dirstate does not exist, populate and create it. This
+ # ensures that we create it on the first meaningful largefiles operation in
+ # a new clone. It also gives us an easy way to forcibly rebuild largefiles
+ # state:
+ # rm .hg/largefiles/dirstate && hg status
+ # Or even, if things are really messed up:
+ # rm -rf .hg/largefiles && hg status
+ if not os.path.exists(os.path.join(admin, 'dirstate')):
+ util.makedirs(admin)
+ matcher = getstandinmatcher(repo)
+ for standin in dirstate_walk(repo.dirstate, matcher):
+ lfile = splitstandin(standin)
+ hash = readstandin(repo, lfile)
+ lfdirstate.normallookup(lfile)
+ try:
+ if hash == hashfile(lfile):
+ lfdirstate.normal(lfile)
+ except IOError, err:
+ if err.errno != errno.ENOENT:
+ raise
+
+ lfdirstate.write()
+
+ return lfdirstate
+
+def lfdirstate_status(lfdirstate, repo, rev):
+ wlock = repo.wlock()
+ try:
+ match = match_.always(repo.root, repo.getcwd())
+ s = lfdirstate.status(match, [], False, False, False)
+ unsure, modified, added, removed, missing, unknown, ignored, clean = s
+ for lfile in unsure:
+ if repo[rev][standin(lfile)].data().strip() != \
+ hashfile(repo.wjoin(lfile)):
+ modified.append(lfile)
+ else:
+ clean.append(lfile)
+ lfdirstate.normal(lfile)
+ lfdirstate.write()
+ finally:
+ wlock.release()
+ return (modified, added, removed, missing, unknown, ignored, clean)
+
+def listlfiles(repo, rev=None, matcher=None):
+ '''list largefiles in the working copy or specified changeset'''
+
+ if matcher is None:
+ matcher = getstandinmatcher(repo)
+
+ # ignore unknown files in working directory
+ return [splitstandin(f) for f in repo[rev].walk(matcher) \
+ if rev is not None or repo.dirstate[f] != '?']
+
+def incache(repo, hash):
+ return os.path.exists(cachepath(repo, hash))
+
+def createdir(dir):
+ if not os.path.exists(dir):
+ os.makedirs(dir)
+
+def cachepath(repo, hash):
+ return repo.join(os.path.join(longname, hash))
+
+def copyfromcache(repo, hash, filename):
+ '''copyfromcache copies the specified largefile from the repo or system
+ cache to the specified location in the repository. It will not throw an
+ exception on failure, as it is meant to be called only after ensuring that
+ the needed largefile exists in the cache.'''
+ path = findfile(repo, hash)
+ if path is None:
+ return False
+ util.makedirs(os.path.dirname(repo.wjoin(filename)))
+ shutil.copy(path, repo.wjoin(filename))
+ return True
+
+def copytocache(repo, rev, file, uploaded=False):
+ hash = readstandin(repo, file)
+ if incache(repo, hash):
+ return
+ copytocacheabsolute(repo, repo.wjoin(file), hash)
+
+def copytocacheabsolute(repo, file, hash):
+ createdir(os.path.dirname(cachepath(repo, hash)))
+ if insystemcache(repo.ui, hash):
+ link(systemcachepath(repo.ui, hash), cachepath(repo, hash))
+ else:
+ shutil.copyfile(file, cachepath(repo, hash))
+ os.chmod(cachepath(repo, hash), os.stat(file).st_mode)
+ linktosystemcache(repo, hash)
+
+def linktosystemcache(repo, hash):
+ createdir(os.path.dirname(systemcachepath(repo.ui, hash)))
+ link(cachepath(repo, hash), systemcachepath(repo.ui, hash))
+
+def getstandinmatcher(repo, pats=[], opts={}):
+ '''Return a match object that applies pats to the standin directory'''
+ standindir = repo.pathto(shortname)
+ if pats:
+ # patterns supplied: search standin directory relative to current dir
+ cwd = repo.getcwd()
+ if os.path.isabs(cwd):
+ # cwd is an absolute path for hg -R <reponame>
+ # work relative to the repository root in this case
+ cwd = ''
+ pats = [os.path.join(standindir, cwd, pat) for pat in pats]
+ elif os.path.isdir(standindir):
+ # no patterns: relative to repo root
+ pats = [standindir]
+ else:
+ # no patterns and no standin dir: return matcher that matches nothing
+ match = match_.match(repo.root, None, [], exact=True)
+ match.matchfn = lambda f: False
+ return match
+ return getmatcher(repo, pats, opts, showbad=False)
+
+def getmatcher(repo, pats=[], opts={}, showbad=True):
+ '''Wrapper around scmutil.match() that adds showbad: if false, neuter
+ the match object\'s bad() method so it does not print any warnings
+ about missing files or directories.'''
+ try:
+ # Mercurial >= 1.9
+ match = scmutil.match(repo[None], pats, opts)
+ except ImportError:
+ # Mercurial <= 1.8
+ match = cmdutil.match(repo, pats, opts)
+
+ if not showbad:
+ match.bad = lambda f, msg: None
+ return match
+
+def composestandinmatcher(repo, rmatcher):
+ '''Return a matcher that accepts standins corresponding to the files
+ accepted by rmatcher. Pass the list of files in the matcher as the
+ paths specified by the user.'''
+ smatcher = getstandinmatcher(repo, rmatcher.files())
+ isstandin = smatcher.matchfn
+ def composed_matchfn(f):
+ return isstandin(f) and rmatcher.matchfn(splitstandin(f))
+ smatcher.matchfn = composed_matchfn
+
+ return smatcher
+
+def standin(filename):
+ '''Return the repo-relative path to the standin for the specified big
+ file.'''
+ # Notes:
+ # 1) Most callers want an absolute path, but _create_standin() needs
+ # it repo-relative so lfadd() can pass it to repo_add(). So leave
+ # it up to the caller to use repo.wjoin() to get an absolute path.
+ # 2) Join with '/' because that's what dirstate always uses, even on
+ # Windows. Change existing separator to '/' first in case we are
+ # passed filenames from an external source (like the command line).
+ return shortname + '/' + filename.replace(os.sep, '/')
+
+def isstandin(filename):
+ '''Return true if filename is a big file standin. filename must
+ be in Mercurial\'s internal form (slash-separated).'''
+ return filename.startswith(shortname + '/')
+
+def splitstandin(filename):
+ # Split on / because that's what dirstate always uses, even on Windows.
+ # Change local separator to / first just in case we are passed filenames
+ # from an external source (like the command line).
+ bits = filename.replace(os.sep, '/').split('/', 1)
+ if len(bits) == 2 and bits[0] == shortname:
+ return bits[1]
+ else:
+ return None
+
+def updatestandin(repo, standin):
+ file = repo.wjoin(splitstandin(standin))
+ if os.path.exists(file):
+ hash = hashfile(file)
+ executable = getexecutable(file)
+ writestandin(repo, standin, hash, executable)
+
+def readstandin(repo, filename, node=None):
+ '''read hex hash from standin for filename at given node, or working
+ directory if no node is given'''
+ return repo[node][standin(filename)].data().strip()
+
+def writestandin(repo, standin, hash, executable):
+ '''write hhash to <repo.root>/<standin>'''
+ writehash(hash, repo.wjoin(standin), executable)
+
+def copyandhash(instream, outfile):
+ '''Read bytes from instream (iterable) and write them to outfile,
+ computing the SHA-1 hash of the data along the way. Close outfile
+ when done and return the binary hash.'''
+ hasher = util.sha1('')
+ for data in instream:
+ hasher.update(data)
+ outfile.write(data)
+
+ # Blecch: closing a file that somebody else opened is rude and
+ # wrong. But it's so darn convenient and practical! After all,
+ # outfile was opened just to copy and hash.
+ outfile.close()
+
+ return hasher.digest()
+
+def hashrepofile(repo, file):
+ return hashfile(repo.wjoin(file))
+
+def hashfile(file):
+ if not os.path.exists(file):
+ return ''
+ hasher = util.sha1('')
+ fd = open(file, 'rb')
+ for data in blockstream(fd):
+ hasher.update(data)
+ fd.close()
+ return hasher.hexdigest()
+
+class limitreader(object):
+ def __init__(self, f, limit):
+ self.f = f
+ self.limit = limit
+
+ def read(self, length):
+ if self.limit == 0:
+ return ''
+ length = length > self.limit and self.limit or length
+ self.limit -= length
+ return self.f.read(length)
+
+ def close(self):
+ pass
+
+def blockstream(infile, blocksize=128 * 1024):
+ """Generator that yields blocks of data from infile and closes infile."""
+ while True:
+ data = infile.read(blocksize)
+ if not data:
+ break
+ yield data
+ # Same blecch as above.
+ infile.close()
+
+def readhash(filename):
+ rfile = open(filename, 'rb')
+ hash = rfile.read(40)
+ rfile.close()
+ if len(hash) < 40:
+ raise util.Abort(_('bad hash in \'%s\' (only %d bytes long)')
+ % (filename, len(hash)))
+ return hash
+
+def writehash(hash, filename, executable):
+ util.makedirs(os.path.dirname(filename))
+ if os.path.exists(filename):
+ os.unlink(filename)
+ wfile = open(filename, 'wb')
+
+ try:
+ wfile.write(hash)
+ wfile.write('\n')
+ finally:
+ wfile.close()
+ if os.path.exists(filename):
+ os.chmod(filename, getmode(executable))
+
+def getexecutable(filename):
+ mode = os.stat(filename).st_mode
+ return (mode & stat.S_IXUSR) and (mode & stat.S_IXGRP) and (mode & \
+ stat.S_IXOTH)
+
+def getmode(executable):
+ if executable:
+ return 0755
+ else:
+ return 0644
+
+def urljoin(first, second, *arg):
+ def join(left, right):
+ if not left.endswith('/'):
+ left += '/'
+ if right.startswith('/'):
+ right = right[1:]
+ return left + right
+
+ url = join(first, second)
+ for a in arg:
+ url = join(url, a)
+ return url
+
+def hexsha1(data):
+ """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
+ object data"""
+ h = hashlib.sha1()
+ for chunk in util.filechunkiter(data):
+ h.update(chunk)
+ return h.hexdigest()
+
+def httpsendfile(ui, filename):
+ try:
+ # Mercurial >= 1.9
+ return httpconnection.httpsendfile(ui, filename, 'rb')
+ except ImportError:
+ if 'ui' in inspect.getargspec(url_.httpsendfile.__init__)[0]:
+ # Mercurial == 1.8
+ return url_.httpsendfile(ui, filename, 'rb')
+ else:
+ # Mercurial <= 1.7
+ return url_.httpsendfile(filename, 'rb')
+
+# Convert a path to a unix style path. This is used to give a
+# canonical path to the lfdirstate.
+def unixpath(path):
+ return os.path.normpath(path).replace(os.sep, '/')
+
+def islfilesrepo(repo):
+ return 'largefiles' in repo.requirements and any_(shortname+'/' in f[0] for f in
+ repo.store.datafiles())
+
+def any_(gen):
+ for x in gen:
+ if x:
+ return True
+ return False
+
+class storeprotonotcapable(BaseException):
+ def __init__(self, storetypes):
+ self.storetypes = storetypes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/localstore.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,71 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Store class for local filesystem.'''
+
+import os
+
+from mercurial import util
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+class localstore(basestore.basestore):
+ '''Because there is a system wide cache, the local store always uses that
+ cache. Since the cache is updated elsewhere, we can just read from it here
+ as if it were the store.'''
+
+ def __init__(self, ui, repo, remote):
+ url = os.path.join(remote.path, '.hg', lfutil.longname)
+ super(localstore, self).__init__(ui, repo, util.expandpath(url))
+
+ def put(self, source, filename, hash):
+ '''Any file that is put must already be in the system wide cache so do
+ nothing.'''
+ return
+
+ def exists(self, hash):
+ return lfutil.insystemcache(self.repo.ui, hash)
+
+ def _getfile(self, tmpfile, filename, hash):
+ if lfutil.insystemcache(self.ui, hash):
+ return lfutil.systemcachepath(self.ui, hash)
+ raise basestore.StoreError(filename, hash, '',
+ _("Can't get file locally"))
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ filename = lfutil.splitstandin(standin)
+ if not filename:
+ return False
+ fctx = cctx[standin]
+ key = (filename, fctx.filenode())
+ if key in verified:
+ return False
+
+ expecthash = fctx.data()[0:40]
+ verified.add(key)
+ if not lfutil.insystemcache(self.ui, expecthash):
+ self.ui.warn(
+ _('changeset %s: %s missing\n'
+ ' (looked for hash %s)\n')
+ % (cset, filename, expecthash))
+ return True # failed
+
+ if contents:
+ storepath = lfutil.systemcachepath(self.ui, expecthash)
+ actualhash = lfutil.hashfile(storepath)
+ if actualhash != expecthash:
+ self.ui.warn(
+ _('changeset %s: %s: contents differ\n'
+ ' (%s:\n'
+ ' expected hash %s,\n'
+ ' but got %s)\n')
+ % (cset, filename, storepath, expecthash, actualhash))
+ return True # failed
+ return False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/overrides.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,902 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Overridden Mercurial commands and functions for the largefiles extension'''
+
+import os
+import copy
+
+from mercurial import hg, commands, util, cmdutil, match as match_, node, \
+ archival, error, merge
+from mercurial.i18n import _
+from mercurial.node import hex
+from hgext import rebase
+
+try:
+ from mercurial import scmutil
+except ImportError:
+ pass
+
+import lfutil
+import lfcommands
+
+def installnormalfilesmatchfn(manifest):
+ '''overrides scmutil.match so that the matcher it returns will ignore all
+ largefiles'''
+ oldmatch = None # for the closure
+ def override_match(repo, pats=[], opts={}, globbed=False,
+ default='relpath'):
+ match = oldmatch(repo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ notlfile = lambda f: not (lfutil.isstandin(f) or lfutil.standin(f) in
+ manifest)
+ m._files = filter(notlfile, m._files)
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: notlfile(f) and orig_matchfn(f) or None
+ return m
+ oldmatch = installmatchfn(override_match)
+
+def installmatchfn(f):
+ try:
+ # Mercurial >= 1.9
+ oldmatch = scmutil.match
+ except ImportError:
+ # Mercurial <= 1.8
+ oldmatch = cmdutil.match
+ setattr(f, 'oldmatch', oldmatch)
+ try:
+ # Mercurial >= 1.9
+ scmutil.match = f
+ except ImportError:
+ # Mercurial <= 1.8
+ cmdutil.match = f
+ return oldmatch
+
+def restorematchfn():
+ '''restores scmutil.match to what it was before installnormalfilesmatchfn
+ was called. no-op if scmutil.match is its original function.
+
+ Note that n calls to installnormalfilesmatchfn will require n calls to
+ restore matchfn to reverse'''
+ try:
+ # Mercurial >= 1.9
+ scmutil.match = getattr(scmutil.match, 'oldmatch', scmutil.match)
+ except ImportError:
+ # Mercurial <= 1.8
+ cmdutil.match = getattr(cmdutil.match, 'oldmatch', cmdutil.match)
+
+# -- Wrappers: modify existing commands --------------------------------
+
+# Add works by going through the files that the user wanted to add
+# and checking if they should be added as lfiles. Then making a new
+# matcher which matches only the normal files and running the original
+# version of add.
+def override_add(orig, ui, repo, *pats, **opts):
+ large = opts.pop('large', None)
+
+ lfsize = opts.pop('lfsize', None)
+ if not lfsize and lfutil.islfilesrepo(repo):
+ lfsize = ui.config(lfutil.longname, 'size', default='10')
+ if lfsize:
+ try:
+ lfsize = int(lfsize)
+ except ValueError:
+ raise util.Abort(_('largefiles: size must be an integer, was %s\n') % lfsize)
+
+ lfmatcher = None
+ if os.path.exists(repo.wjoin(lfutil.shortname)):
+ lfpats = ui.config(lfutil.longname, 'patterns', default=())
+ if lfpats:
+ lfpats = lfpats.split(' ')
+ lfmatcher = match_.match(repo.root, '', list(lfpats))
+
+ lfnames = []
+ try:
+ # Mercurial >= 1.9
+ m = scmutil.match(repo[None], pats, opts)
+ except ImportError:
+ # Mercurial <= 1.8
+ m = cmdutil.match(repo, pats, opts)
+ m.bad = lambda x, y: None
+ wctx = repo[None]
+ for f in repo.walk(m):
+ exact = m.exact(f)
+ lfile = lfutil.standin(f) in wctx
+ nfile = f in wctx
+ exists = lfile or nfile
+
+ # Don't warn the user when they attempt to add a normal tracked file.
+ # The normal add code will do that for us.
+ if exact and exists:
+ if lfile:
+ ui.warn(_('%s already a largefile\n') % f)
+ continue
+
+ if exact or not exists:
+ if large or (lfsize and os.path.getsize(repo.wjoin(f)) >= \
+ lfsize * 1024 * 1024) or (lfmatcher and lfmatcher(f)):
+ lfnames.append(f)
+ if ui.verbose or not exact:
+ ui.status(_('adding %s as a largefile\n') % m.rel(f))
+
+ bad = []
+ standins = []
+
+ # Need to lock otherwise there could be a race condition inbetween when
+ # standins are created and added to the repo
+ wlock = repo.wlock()
+ try:
+ if not opts.get('dry_run'):
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ for f in lfnames:
+ standinname = lfutil.standin(f)
+ lfutil.writestandin(repo, standinname, hash='',
+ executable=lfutil.getexecutable(repo.wjoin(f)))
+ standins.append(standinname)
+ if lfdirstate[f] == 'r':
+ lfdirstate.normallookup(f)
+ else:
+ lfdirstate.add(f)
+ lfdirstate.write()
+ bad += [lfutil.splitstandin(f) for f in lfutil.repo_add(repo,
+ standins) if f in m.files()]
+ finally:
+ wlock.release()
+
+ installnormalfilesmatchfn(repo[None].manifest())
+ result = orig(ui, repo, *pats, **opts)
+ restorematchfn()
+
+ return (result == 1 or bad) and 1 or 0
+
+def override_remove(orig, ui, repo, *pats, **opts):
+ manifest = repo[None].manifest()
+ installnormalfilesmatchfn(manifest)
+ orig(ui, repo, *pats, **opts)
+ restorematchfn()
+
+ after, force = opts.get('after'), opts.get('force')
+ if not pats and not after:
+ raise util.Abort(_('no files specified'))
+ try:
+ # Mercurial >= 1.9
+ m = scmutil.match(repo[None], pats, opts)
+ except ImportError:
+ # Mercurial <= 1.8
+ m = cmdutil.match(repo, pats, opts)
+ try:
+ repo.lfstatus = True
+ s = repo.status(match=m, clean=True)
+ finally:
+ repo.lfstatus = False
+ modified, added, deleted, clean = [[f for f in list if lfutil.standin(f) \
+ in manifest] for list in [s[0], s[1], s[3], s[6]]]
+
+ def warn(files, reason):
+ for f in files:
+ ui.warn(_('not removing %s: file %s (use -f to force removal)\n')
+ % (m.rel(f), reason))
+
+ if force:
+ remove, forget = modified + deleted + clean, added
+ elif after:
+ remove, forget = deleted, []
+ warn(modified + added + clean, _('still exists'))
+ else:
+ remove, forget = deleted + clean, []
+ warn(modified, _('is modified'))
+ warn(added, _('has been marked for add'))
+
+ for f in sorted(remove + forget):
+ if ui.verbose or not m.exact(f):
+ ui.status(_('removing %s\n') % m.rel(f))
+
+ # Need to lock because standin files are deleted then removed from the
+ # repository and we could race inbetween.
+ wlock = repo.wlock()
+ try:
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ for f in remove:
+ if not after:
+ os.unlink(repo.wjoin(f))
+ currentdir = os.path.split(f)[0]
+ while currentdir and not os.listdir(repo.wjoin(currentdir)):
+ os.rmdir(repo.wjoin(currentdir))
+ currentdir = os.path.split(currentdir)[0]
+ lfdirstate.remove(f)
+ lfdirstate.write()
+
+ forget = [lfutil.standin(f) for f in forget]
+ remove = [lfutil.standin(f) for f in remove]
+ lfutil.repo_forget(repo, forget)
+ lfutil.repo_remove(repo, remove, unlink=True)
+ finally:
+ wlock.release()
+
+def override_status(orig, ui, repo, *pats, **opts):
+ try:
+ repo.lfstatus = True
+ return orig(ui, repo, *pats, **opts)
+ finally:
+ repo.lfstatus = False
+
+def override_log(orig, ui, repo, *pats, **opts):
+ try:
+ repo.lfstatus = True
+ orig(ui, repo, *pats, **opts)
+ finally:
+ repo.lfstatus = False
+
+def override_verify(orig, ui, repo, *pats, **opts):
+ large = opts.pop('large', False)
+ all = opts.pop('lfa', False)
+ contents = opts.pop('lfc', False)
+
+ result = orig(ui, repo, *pats, **opts)
+ if large:
+ result = result or lfcommands.verifylfiles(ui, repo, all, contents)
+ return result
+
+# Override needs to refresh standins so that update's normal merge
+# will go through properly. Then the other update hook (overriding repo.update)
+# will get the new files. Filemerge is also overriden so that the merge
+# will merge standins correctly.
+def override_update(orig, ui, repo, *pats, **opts):
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ s = lfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False,
+ False, False)
+ (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+
+ # Need to lock between the standins getting updated and their lfiles
+ # getting updated
+ wlock = repo.wlock()
+ try:
+ if opts['check']:
+ mod = len(modified) > 0
+ for lfile in unsure:
+ standin = lfutil.standin(lfile)
+ if repo['.'][standin].data().strip() != \
+ lfutil.hashfile(repo.wjoin(lfile)):
+ mod = True
+ else:
+ lfdirstate.normal(lfile)
+ lfdirstate.write()
+ if mod:
+ raise util.Abort(_('uncommitted local changes'))
+ # XXX handle removed differently
+ if not opts['clean']:
+ for lfile in unsure + modified + added:
+ lfutil.updatestandin(repo, lfutil.standin(lfile))
+ finally:
+ wlock.release()
+ return orig(ui, repo, *pats, **opts)
+
+# Override filemerge to prompt the user about how they wish to merge lfiles.
+# This will handle identical edits, and copy/rename + edit without prompting
+# the user.
+def override_filemerge(origfn, repo, mynode, orig, fcd, fco, fca):
+ # Use better variable names here. Because this is a wrapper we cannot
+ # change the variable names in the function declaration.
+ fcdest, fcother, fcancestor = fcd, fco, fca
+ if not lfutil.isstandin(orig):
+ return origfn(repo, mynode, orig, fcdest, fcother, fcancestor)
+ else:
+ if not fcother.cmp(fcdest): # files identical?
+ return None
+
+ # backwards, use working dir parent as ancestor
+ if fcancestor == fcother:
+ fcancestor = fcdest.parents()[0]
+
+ if orig != fcother.path():
+ repo.ui.status(_('merging %s and %s to %s\n')
+ % (lfutil.splitstandin(orig),
+ lfutil.splitstandin(fcother.path()),
+ lfutil.splitstandin(fcdest.path())))
+ else:
+ repo.ui.status(_('merging %s\n')
+ % lfutil.splitstandin(fcdest.path()))
+
+ if fcancestor.path() != fcother.path() and fcother.data() == \
+ fcancestor.data():
+ return 0
+ if fcancestor.path() != fcdest.path() and fcdest.data() == \
+ fcancestor.data():
+ repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+ return 0
+
+ if repo.ui.promptchoice(_('largefile %s has a merge conflict\n'
+ 'keep (l)ocal or take (o)ther?') %
+ lfutil.splitstandin(orig),
+ (_('&Local'), _('&Other')), 0) == 0:
+ return 0
+ else:
+ repo.wwrite(fcdest.path(), fcother.data(), fcother.flags())
+ return 0
+
+# Copy first changes the matchers to match standins instead of lfiles.
+# Then it overrides util.copyfile in that function it checks if the destination
+# lfile already exists. It also keeps a list of copied files so that the lfiles
+# can be copied and the dirstate updated.
+def override_copy(orig, ui, repo, pats, opts, rename=False):
+ # doesn't remove lfile on rename
+ if len(pats) < 2:
+ # this isn't legal, let the original function deal with it
+ return orig(ui, repo, pats, opts, rename)
+
+ def makestandin(relpath):
+ try:
+ # Mercurial >= 1.9
+ path = scmutil.canonpath(repo.root, repo.getcwd(), relpath)
+ except ImportError:
+ # Mercurial <= 1.8
+ path = util.canonpath(repo.root, repo.getcwd(), relpath)
+ return os.path.join(os.path.relpath('.', repo.getcwd()),
+ lfutil.standin(path))
+
+ try:
+ # Mercurial >= 1.9
+ fullpats = scmutil.expandpats(pats)
+ except ImportError:
+ # Mercurial <= 1.8
+ fullpats = cmdutil.expandpats(pats)
+ dest = fullpats[-1]
+
+ if os.path.isdir(dest):
+ if not os.path.isdir(makestandin(dest)):
+ os.makedirs(makestandin(dest))
+ # This could copy both lfiles and normal files in one command, but we don't
+ # want to do that first replace their matcher to only match normal files
+ # and run it then replace it to just match lfiles and run it again
+ nonormalfiles = False
+ nolfiles = False
+ try:
+ installnormalfilesmatchfn(repo[None].manifest())
+ result = orig(ui, repo, pats, opts, rename)
+ except util.Abort, e:
+ if str(e) != 'no files to copy':
+ raise e
+ else:
+ nonormalfiles = True
+ result = 0
+ finally:
+ restorematchfn()
+
+ # The first rename can cause our current working directory to be removed.
+ # In that case there is nothing left to copy/rename so just quit.
+ try:
+ repo.getcwd()
+ except OSError:
+ return result
+
+ try:
+ # When we call orig below it creates the standins but we don't add them
+ # to the dir state until later so lock during that time.
+ wlock = repo.wlock()
+
+ manifest = repo[None].manifest()
+ oldmatch = None # for the closure
+ def override_match(repo, pats=[], opts={}, globbed=False,
+ default='relpath'):
+ newpats = []
+ # The patterns were previously mangled to add the standin
+ # directory; we need to remove that now
+ for pat in pats:
+ if match_.patkind(pat) is None and lfutil.shortname in pat:
+ newpats.append(pat.replace(lfutil.shortname, ''))
+ else:
+ newpats.append(pat)
+ match = oldmatch(repo, newpats, opts, globbed, default)
+ m = copy.copy(match)
+ lfile = lambda f: lfutil.standin(f) in manifest
+ m._files = [lfutil.standin(f) for f in m._files if lfile(f)]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ m.matchfn = lambda f: lfutil.isstandin(f) and \
+ lfile(lfutil.splitstandin(f)) and \
+ orig_matchfn(lfutil.splitstandin(f)) or None
+ return m
+ oldmatch = installmatchfn(override_match)
+ listpats = []
+ for pat in pats:
+ if match_.patkind(pat) is not None:
+ listpats.append(pat)
+ else:
+ listpats.append(makestandin(pat))
+
+ try:
+ origcopyfile = util.copyfile
+ copiedfiles = []
+ def override_copyfile(src, dest):
+ if lfutil.shortname in src and lfutil.shortname in dest:
+ destlfile = dest.replace(lfutil.shortname, '')
+ if not opts['force'] and os.path.exists(destlfile):
+ raise IOError('',
+ _('destination largefile already exists'))
+ copiedfiles.append((src, dest))
+ origcopyfile(src, dest)
+
+ util.copyfile = override_copyfile
+ result += orig(ui, repo, listpats, opts, rename)
+ finally:
+ util.copyfile = origcopyfile
+
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ for (src, dest) in copiedfiles:
+ if lfutil.shortname in src and lfutil.shortname in dest:
+ srclfile = src.replace(lfutil.shortname, '')
+ destlfile = dest.replace(lfutil.shortname, '')
+ destlfiledir = os.path.dirname(destlfile) or '.'
+ if not os.path.isdir(destlfiledir):
+ os.makedirs(destlfiledir)
+ if rename:
+ os.rename(srclfile, destlfile)
+ lfdirstate.remove(os.path.relpath(srclfile,
+ repo.root))
+ else:
+ util.copyfile(srclfile, destlfile)
+ lfdirstate.add(os.path.relpath(destlfile,
+ repo.root))
+ lfdirstate.write()
+ except util.Abort, e:
+ if str(e) != 'no files to copy':
+ raise e
+ else:
+ nolfiles = True
+ finally:
+ restorematchfn()
+ wlock.release()
+
+ if nolfiles and nonormalfiles:
+ raise util.Abort(_('no files to copy'))
+
+ return result
+
+# When the user calls revert, we have to be careful to not revert any changes
+# to other lfiles accidentally. This means we have to keep track of the lfiles
+# that are being reverted so we only pull down the necessary lfiles.
+#
+# Standins are only updated (to match the hash of lfiles) before commits.
+# Update the standins then run the original revert (changing the matcher to hit
+# standins instead of lfiles). Based on the resulting standins update the
+# lfiles. Then return the standins to their proper state
+def override_revert(orig, ui, repo, *pats, **opts):
+ # Because we put the standins in a bad state (by updating them) and then
+ # return them to a correct state we need to lock to prevent others from
+ # changing them in their incorrect state.
+ wlock = repo.wlock()
+ try:
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ (modified, added, removed, missing, unknown, ignored, clean) = \
+ lfutil.lfdirstate_status(lfdirstate, repo, repo['.'].rev())
+ for lfile in modified:
+ lfutil.updatestandin(repo, lfutil.standin(lfile))
+
+ try:
+ ctx = repo[opts.get('rev')]
+ oldmatch = None # for the closure
+ def override_match(ctxorrepo, pats=[], opts={}, globbed=False,
+ default='relpath'):
+ if hasattr(ctxorrepo, 'match'):
+ ctx0 = ctxorrepo
+ else:
+ ctx0 = ctxorrepo[None]
+ match = oldmatch(ctxorrepo, pats, opts, globbed, default)
+ m = copy.copy(match)
+ def tostandin(f):
+ if lfutil.standin(f) in ctx0 or lfutil.standin(f) in ctx:
+ return lfutil.standin(f)
+ elif lfutil.standin(f) in repo[None]:
+ return None
+ return f
+ m._files = [tostandin(f) for f in m._files]
+ m._files = [f for f in m._files if f is not None]
+ m._fmap = set(m._files)
+ orig_matchfn = m.matchfn
+ def matchfn(f):
+ if lfutil.isstandin(f):
+ # We need to keep track of what lfiles are being
+ # matched so we know which ones to update later
+ # (otherwise we revert changes to other lfiles
+ # accidentally). This is repo specific, so duckpunch
+ # the repo object to keep the list of lfiles for us
+ # later.
+ if orig_matchfn(lfutil.splitstandin(f)) and \
+ (f in repo[None] or f in ctx):
+ lfileslist = getattr(repo, '_lfilestoupdate', [])
+ lfileslist.append(lfutil.splitstandin(f))
+ repo._lfilestoupdate = lfileslist
+ return True
+ else:
+ return False
+ return orig_matchfn(f)
+ m.matchfn = matchfn
+ return m
+ oldmatch = installmatchfn(override_match)
+ try:
+ # Mercurial >= 1.9
+ scmutil.match
+ matches = override_match(repo[None], pats, opts)
+ except ImportError:
+ # Mercurial <= 1.8
+ matches = override_match(repo, pats, opts)
+ orig(ui, repo, *pats, **opts)
+ finally:
+ restorematchfn()
+ lfileslist = getattr(repo, '_lfilestoupdate', [])
+ lfcommands.updatelfiles(ui, repo, filelist=lfileslist, printmessage=False)
+ # Empty out the lfiles list so we start fresh next time
+ repo._lfilestoupdate = []
+ for lfile in modified:
+ if lfile in lfileslist:
+ if os.path.exists(repo.wjoin(lfutil.standin(lfile))) and lfile\
+ in repo['.']:
+ lfutil.writestandin(repo, lfutil.standin(lfile),
+ repo['.'][lfile].data().strip(),
+ 'x' in repo['.'][lfile].flags())
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ for lfile in added:
+ standin = lfutil.standin(lfile)
+ if standin not in ctx and (standin in matches or opts.get('all')):
+ if lfile in lfdirstate:
+ try:
+ # Mercurial >= 1.9
+ lfdirstate.drop(lfile)
+ except AttributeError:
+ # Mercurial <= 1.8
+ lfdirstate.forget(lfile)
+ util.unlinkpath(repo.wjoin(standin))
+ lfdirstate.write()
+ finally:
+ wlock.release()
+
+def hg_update(orig, repo, node):
+ result = orig(repo, node)
+ # XXX check if it worked first
+ lfcommands.updatelfiles(repo.ui, repo)
+ return result
+
+def hg_clean(orig, repo, node, show_stats=True):
+ result = orig(repo, node, show_stats)
+ lfcommands.updatelfiles(repo.ui, repo)
+ return result
+
+def hg_merge(orig, repo, node, force=None, remind=True):
+ result = orig(repo, node, force, remind)
+ lfcommands.updatelfiles(repo.ui, repo)
+ return result
+
+# When we rebase a repository with remotely changed lfiles, we need
+# to take some extra care so that the lfiles are correctly updated
+# in the working copy
+def override_pull(orig, ui, repo, source=None, **opts):
+ if opts.get('rebase', False):
+ repo._isrebasing = True
+ try:
+ if opts.get('update'):
+ del opts['update']
+ ui.debug('--update and --rebase are not compatible, ignoring '
+ 'the update flag\n')
+ del opts['rebase']
+ try:
+ # Mercurial >= 1.9
+ cmdutil.bailifchanged(repo)
+ except AttributeError:
+ # Mercurial <= 1.8
+ cmdutil.bail_if_changed(repo)
+ revsprepull = len(repo)
+ origpostincoming = commands.postincoming
+ def _dummy(*args, **kwargs):
+ pass
+ commands.postincoming = _dummy
+ repo.lfpullsource = source
+ if not source:
+ source = 'default'
+ try:
+ result = commands.pull(ui, repo, source, **opts)
+ finally:
+ commands.postincoming = origpostincoming
+ revspostpull = len(repo)
+ if revspostpull > revsprepull:
+ result = result or rebase.rebase(ui, repo)
+ finally:
+ repo._isrebasing = False
+ else:
+ repo.lfpullsource = source
+ if not source:
+ source = 'default'
+ result = orig(ui, repo, source, **opts)
+ return result
+
+def override_rebase(orig, ui, repo, **opts):
+ repo._isrebasing = True
+ try:
+ orig(ui, repo, **opts)
+ finally:
+ repo._isrebasing = False
+
+def override_archive(orig, repo, dest, node, kind, decode=True, matchfn=None,
+ prefix=None, mtime=None, subrepos=None):
+ # No need to lock because we are only reading history and lfile caches
+ # neither of which are modified
+
+ lfcommands.cachelfiles(repo.ui, repo, node)
+
+ if kind not in archival.archivers:
+ raise util.Abort(_("unknown archive type '%s'") % kind)
+
+ ctx = repo[node]
+
+ # In Mercurial <= 1.5 the prefix is passed to the archiver so try that
+ # if that doesn't work we are probably in Mercurial >= 1.6 where the
+ # prefix is not handled by the archiver
+ try:
+ archiver = archival.archivers[kind](dest, prefix, mtime or \
+ ctx.date()[0])
+
+ def write(name, mode, islink, getdata):
+ if matchfn and not matchfn(name):
+ return
+ data = getdata()
+ if decode:
+ data = repo.wwritedata(name, data)
+ archiver.addfile(name, mode, islink, data)
+ except TypeError:
+ if kind == 'files':
+ if prefix:
+ raise util.Abort(
+ _('cannot give prefix when archiving to files'))
+ else:
+ prefix = archival.tidyprefix(dest, kind, prefix)
+
+ def write(name, mode, islink, getdata):
+ if matchfn and not matchfn(name):
+ return
+ data = getdata()
+ if decode:
+ data = repo.wwritedata(name, data)
+ archiver.addfile(prefix + name, mode, islink, data)
+
+ archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
+
+ if repo.ui.configbool("ui", "archivemeta", True):
+ def metadata():
+ base = 'repo: %s\nnode: %s\nbranch: %s\n' % (
+ hex(repo.changelog.node(0)), hex(node), ctx.branch())
+
+ tags = ''.join('tag: %s\n' % t for t in ctx.tags()
+ if repo.tagtype(t) == 'global')
+ if not tags:
+ repo.ui.pushbuffer()
+ opts = {'template': '{latesttag}\n{latesttagdistance}',
+ 'style': '', 'patch': None, 'git': None}
+ cmdutil.show_changeset(repo.ui, repo, opts).show(ctx)
+ ltags, dist = repo.ui.popbuffer().split('\n')
+ tags = ''.join('latesttag: %s\n' % t for t in ltags.split(':'))
+ tags += 'latesttagdistance: %s\n' % dist
+
+ return base + tags
+
+ write('.hg_archival.txt', 0644, False, metadata)
+
+ for f in ctx:
+ ff = ctx.flags(f)
+ getdata = ctx[f].data
+ if lfutil.isstandin(f):
+ path = lfutil.findfile(repo, getdata().strip())
+ f = lfutil.splitstandin(f)
+
+ def getdatafn():
+ try:
+ fd = open(path, 'rb')
+ return fd.read()
+ finally:
+ fd.close()
+
+ getdata = getdatafn
+ write(f, 'x' in ff and 0755 or 0644, 'l' in ff, getdata)
+
+ if subrepos:
+ for subpath in ctx.substate:
+ sub = ctx.sub(subpath)
+ try:
+ sub.archive(repo.ui, archiver, prefix)
+ except TypeError:
+ sub.archive(archiver, prefix)
+
+ archiver.done()
+
+# If a lfile is modified the change is not reflected in its standin until a
+# commit. cmdutil.bailifchanged raises an exception if the repo has
+# uncommitted changes. Wrap it to also check if lfiles were changed. This is
+# used by bisect and backout.
+def override_bailifchanged(orig, repo):
+ orig(repo)
+ repo.lfstatus = True
+ modified, added, removed, deleted = repo.status()[:4]
+ repo.lfstatus = False
+ if modified or added or removed or deleted:
+ raise util.Abort(_('outstanding uncommitted changes'))
+
+# Fetch doesn't use cmdutil.bail_if_changed so override it to add the check
+def override_fetch(orig, ui, repo, *pats, **opts):
+ repo.lfstatus = True
+ modified, added, removed, deleted = repo.status()[:4]
+ repo.lfstatus = False
+ if modified or added or removed or deleted:
+ raise util.Abort(_('outstanding uncommitted changes'))
+ return orig(ui, repo, *pats, **opts)
+
+def override_forget(orig, ui, repo, *pats, **opts):
+ installnormalfilesmatchfn(repo[None].manifest())
+ orig(ui, repo, *pats, **opts)
+ restorematchfn()
+ try:
+ # Mercurial >= 1.9
+ m = scmutil.match(repo[None], pats, opts)
+ except ImportError:
+ # Mercurial <= 1.8
+ m = cmdutil.match(repo, pats, opts)
+
+ try:
+ repo.lfstatus = True
+ s = repo.status(match=m, clean=True)
+ finally:
+ repo.lfstatus = False
+ forget = sorted(s[0] + s[1] + s[3] + s[6])
+ forget = [f for f in forget if lfutil.standin(f) in repo[None].manifest()]
+
+ for f in forget:
+ if lfutil.standin(f) not in repo.dirstate and not \
+ os.path.isdir(m.rel(lfutil.standin(f))):
+ ui.warn(_('not removing %s: file is already untracked\n')
+ % m.rel(f))
+
+ for f in forget:
+ if ui.verbose or not m.exact(f):
+ ui.status(_('removing %s\n') % m.rel(f))
+
+ # Need to lock because standin files are deleted then removed from the
+ # repository and we could race inbetween.
+ wlock = repo.wlock()
+ try:
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ for f in forget:
+ if lfdirstate[f] == 'a':
+ lfdirstate.drop(f)
+ else:
+ lfdirstate.remove(f)
+ lfdirstate.write()
+ lfutil.repo_remove(repo, [lfutil.standin(f) for f in forget],
+ unlink=True)
+ finally:
+ wlock.release()
+
+def getoutgoinglfiles(ui, repo, dest=None, **opts):
+ dest = ui.expandpath(dest or 'default-push', dest or 'default')
+ dest, branches = hg.parseurl(dest, opts.get('branch'))
+ revs, checkout = hg.addbranchrevs(repo, repo, branches, opts.get('rev'))
+ if revs:
+ revs = [repo.lookup(rev) for rev in revs]
+
+ # Mercurial <= 1.5 had remoteui in cmdutil, then it moved to hg
+ try:
+ remoteui = cmdutil.remoteui
+ except AttributeError:
+ remoteui = hg.remoteui
+
+ try:
+ remote = hg.repository(remoteui(repo, opts), dest)
+ except error.RepoError:
+ return None
+ o = lfutil.findoutgoing(repo, remote, False)
+ if not o:
+ return None
+ o = repo.changelog.nodesbetween(o, revs)[0]
+ if opts.get('newest_first'):
+ o.reverse()
+
+ toupload = set()
+ for n in o:
+ parents = [p for p in repo.changelog.parents(n) if p != node.nullid]
+ ctx = repo[n]
+ files = set(ctx.files())
+ if len(parents) == 2:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
+ files.add(f)
+ toupload = toupload.union(set([f for f in files if lfutil.isstandin(f)\
+ and f in ctx]))
+ return toupload
+
+def override_outgoing(orig, ui, repo, dest=None, **opts):
+ orig(ui, repo, dest, **opts)
+
+ if opts.pop('large', None):
+ toupload = getoutgoinglfiles(ui, repo, dest, **opts)
+ if toupload is None:
+ ui.status(_('largefiles: No remote repo\n'))
+ else:
+ ui.status(_('largefiles to upload:\n'))
+ for file in toupload:
+ ui.status(lfutil.splitstandin(file) + '\n')
+ ui.status('\n')
+
+def override_summary(orig, ui, repo, *pats, **opts):
+ orig(ui, repo, *pats, **opts)
+
+ if opts.pop('large', None):
+ toupload = getoutgoinglfiles(ui, repo, None, **opts)
+ if toupload is None:
+ ui.status(_('largefiles: No remote repo\n'))
+ else:
+ ui.status(_('largefiles: %d to upload\n') % len(toupload))
+
+def override_addremove(orig, ui, repo, *pats, **opts):
+ # Check if the parent or child has lfiles if they do don't allow it. If
+ # there is a symlink in the manifest then getting the manifest throws an
+ # exception catch it and let addremove deal with it. This happens in
+ # Mercurial's test test-addremove-symlink
+ try:
+ manifesttip = set(repo['tip'].manifest())
+ except util.Abort:
+ manifesttip = set()
+ try:
+ manifestworking = set(repo[None].manifest())
+ except util.Abort:
+ manifestworking = set()
+
+ # Manifests are only iterable so turn them into sets then union
+ for file in manifesttip.union(manifestworking):
+ if file.startswith(lfutil.shortname):
+ raise util.Abort(
+ _('addremove cannot be run on a repo with largefiles'))
+
+ return orig(ui, repo, *pats, **opts)
+
+# Calling purge with --all will cause the lfiles to be deleted.
+# Override repo.status to prevent this from happening.
+def override_purge(orig, ui, repo, *dirs, **opts):
+ oldstatus = repo.status
+ def override_status(node1='.', node2=None, match=None, ignored=False,
+ clean=False, unknown=False, listsubrepos=False):
+ r = oldstatus(node1, node2, match, ignored, clean, unknown,
+ listsubrepos)
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ modified, added, removed, deleted, unknown, ignored, clean = r
+ unknown = [f for f in unknown if lfdirstate[f] == '?']
+ ignored = [f for f in ignored if lfdirstate[f] == '?']
+ return modified, added, removed, deleted, unknown, ignored, clean
+ repo.status = override_status
+ orig(ui, repo, *dirs, **opts)
+ repo.status = oldstatus
+
+def override_rollback(orig, ui, repo, **opts):
+ result = orig(ui, repo, **opts)
+ merge.update(repo, node=None, branchmerge=False, force=True,
+ partial=lfutil.isstandin)
+ lfdirstate = lfutil.openlfdirstate(ui, repo)
+ lfiles = lfutil.listlfiles(repo)
+ oldlfiles = lfutil.listlfiles(repo, repo[None].parents()[0].rev())
+ for file in lfiles:
+ if file in oldlfiles:
+ lfdirstate.normallookup(file)
+ else:
+ lfdirstate.add(file)
+ lfdirstate.write()
+ return result
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/proto.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,161 @@
+# Copyright 2011 Fog Creek Software
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+import os
+import tempfile
+import urllib2
+
+from mercurial import error, httprepo, util, wireproto
+from mercurial.i18n import _
+
+import lfutil
+
+LARGEFILES_REQUIRED_MSG = '\nThis repository uses the largefiles extension.' \
+ '\n\nPlease enable it in your Mercurial config ' \
+ 'file.\n'
+
+def putlfile(repo, proto, sha):
+ """putlfile puts a largefile into a repository's local cache and into the
+ system cache."""
+ f = None
+ proto.redirect()
+ try:
+ try:
+ f = tempfile.NamedTemporaryFile(mode='wb+', prefix='hg-putlfile-')
+ proto.getfile(f)
+ f.seek(0)
+ if sha != lfutil.hexsha1(f):
+ return wireproto.pushres(1)
+ lfutil.copytocacheabsolute(repo, f.name, sha)
+ except IOError:
+ repo.ui.warn(
+ _('error: could not put received data into largefile store'))
+ return wireproto.pushres(1)
+ finally:
+ if f:
+ f.close()
+
+ return wireproto.pushres(0)
+
+def getlfile(repo, proto, sha):
+ """getlfile retrieves a largefile from the repository-local cache or system
+ cache."""
+ filename = lfutil.findfile(repo, sha)
+ if not filename:
+ raise util.Abort(_('requested largefile %s not present in cache') % sha)
+ f = open(filename, 'rb')
+ length = os.fstat(f.fileno())[6]
+ # since we can't set an HTTP content-length header here, and mercurial core
+ # provides no way to give the length of a streamres (and reading the entire
+ # file into RAM would be ill-advised), we just send the length on the first
+ # line of the response, like the ssh proto does for string responses.
+ def generator():
+ yield '%d\n' % length
+ for chunk in f:
+ yield chunk
+ return wireproto.streamres(generator())
+
+def statlfile(repo, proto, sha):
+ """statlfile sends '2\n' if the largefile is missing, '1\n' if it has a
+ mismatched checksum, or '0\n' if it is in good condition"""
+ filename = lfutil.findfile(repo, sha)
+ if not filename:
+ return '2\n'
+ fd = None
+ try:
+ fd = open(filename, 'rb')
+ return lfutil.hexsha1(fd) == sha and '0\n' or '1\n'
+ finally:
+ if fd:
+ fd.close()
+
+def wirereposetup(ui, repo):
+ class lfileswirerepository(repo.__class__):
+ def putlfile(self, sha, fd):
+ # unfortunately, httprepository._callpush tries to convert its
+ # input file-like into a bundle before sending it, so we can't use
+ # it ...
+ if issubclass(self.__class__, httprepo.httprepository):
+ try:
+ return int(self._call('putlfile', data=fd, sha=sha,
+ headers={'content-type':'application/mercurial-0.1'}))
+ except (ValueError, urllib2.HTTPError):
+ return 1
+ # ... but we can't use sshrepository._call because the data=
+ # argument won't get sent, and _callpush does exactly what we want
+ # in this case: send the data straight through
+ else:
+ try:
+ ret, output = self._callpush("putlfile", fd, sha=sha)
+ if ret == "":
+ raise error.ResponseError(_('putlfile failed:'),
+ output)
+ return int(ret)
+ except IOError:
+ return 1
+ except ValueError:
+ raise error.ResponseError(
+ _('putlfile failed (unexpected response):'), ret)
+
+ def getlfile(self, sha):
+ stream = self._callstream("getlfile", sha=sha)
+ length = stream.readline()
+ try:
+ length = int(length)
+ except ValueError:
+ self._abort(error.ResponseError(_("unexpected response:"), length))
+ return (length, stream)
+
+ def statlfile(self, sha):
+ try:
+ return int(self._call("statlfile", sha=sha))
+ except (ValueError, urllib2.HTTPError):
+ # if the server returns anything but an integer followed by a
+ # newline, newline, it's not speaking our language; if we get
+ # an HTTP error, we can't be sure the largefile is present;
+ # either way, consider it missing
+ return 2
+
+ repo.__class__ = lfileswirerepository
+
+# advertise the largefiles=serve capability
+def capabilities(repo, proto):
+ return capabilities_orig(repo, proto) + ' largefiles=serve'
+
+# duplicate what Mercurial's new out-of-band errors mechanism does, because
+# clients old and new alike both handle it well
+def webproto_refuseclient(self, message):
+ self.req.header([('Content-Type', 'application/hg-error')])
+ return message
+
+def sshproto_refuseclient(self, message):
+ self.ui.write_err('%s\n-\n' % message)
+ self.fout.write('\n')
+ self.fout.flush()
+
+ return ''
+
+def heads(repo, proto):
+ if lfutil.islfilesrepo(repo):
+ try:
+ # Mercurial >= f4522df38c65
+ return wireproto.ooberror(LARGEFILES_REQUIRED_MSG)
+ except AttributeError:
+ return proto.refuseclient(LARGEFILES_REQUIRED_MSG)
+ return wireproto.heads(repo, proto)
+
+def sshrepo_callstream(self, cmd, **args):
+ if cmd == 'heads' and self.capable('largefiles'):
+ cmd = 'lheads'
+ if cmd == 'batch' and self.capable('largefiles'):
+ args['cmds'] = args['cmds'].replace('heads ', 'lheads ')
+ return ssh_oldcallstream(self, cmd, **args)
+
+def httprepo_callstream(self, cmd, **args):
+ if cmd == 'heads' and self.capable('largefiles'):
+ cmd = 'lheads'
+ if cmd == 'batch' and self.capable('largefiles'):
+ args['cmds'] = args['cmds'].replace('heads ', 'lheads ')
+ return http_oldcallstream(self, cmd, **args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/remotestore.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,106 @@
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''Remote largefile store; the base class for servestore'''
+
+import urllib2
+import HTTPError
+
+from mercurial import util
+from mercurial.i18n import _
+
+import lfutil
+import basestore
+
+class remotestore(basestore.basestore):
+ """A largefile store accessed over a network"""
+ def __init__(self, ui, repo, url):
+ super(remotestore, self).__init__(ui, repo, url)
+
+ def put(self, source, hash):
+ if self._verify(hash):
+ return
+ if self.sendfile(source, hash):
+ raise util.Abort(
+ _('remotestore: could not put %s to remote store %s')
+ % (source, self.url))
+ self.ui.debug(
+ _('remotestore: put %s to remote store %s') % (source, self.url))
+
+ def exists(self, hash):
+ return self._verify(hash)
+
+ def sendfile(self, filename, hash):
+ self.ui.debug('remotestore: sendfile(%s, %s)\n' % (filename, hash))
+ fd = None
+ try:
+ try:
+ fd = lfutil.httpsendfile(self.ui, filename)
+ except IOError, e:
+ raise util.Abort(
+ _('remotestore: could not open file %s: %s')
+ % (filename, str(e)))
+ return self._put(hash, fd)
+ finally:
+ if fd:
+ fd.close()
+
+ def _getfile(self, tmpfile, filename, hash):
+ # quit if the largefile isn't there
+ stat = self._stat(hash)
+ if stat:
+ raise util.Abort(_('remotestore: largefile %s is %s') %
+ (hash, stat == 1 and 'invalid' or 'missing'))
+
+ try:
+ length, infile = self._get(hash)
+ except HTTPError, e:
+ # 401s get converted to util.Aborts; everything else is fine being
+ # turned into a StoreError
+ raise basestore.StoreError(filename, hash, self.url, str(e))
+ except urllib2.URLError, e:
+ # This usually indicates a connection problem, so don't
+ # keep trying with the other files... they will probably
+ # all fail too.
+ raise util.Abort('%s: %s' % (self.url, str(e.reason)))
+ except IOError, e:
+ raise basestore.StoreError(filename, hash, self.url, str(e))
+
+ # Mercurial does not close its SSH connections after writing a stream
+ if length is not None:
+ infile = lfutil.limitreader(infile, length)
+ return lfutil.copyandhash(lfutil.blockstream(infile), tmpfile)
+
+ def _verify(self, hash):
+ return not self._stat(hash)
+
+ def _verifyfile(self, cctx, cset, contents, standin, verified):
+ filename = lfutil.splitstandin(standin)
+ if not filename:
+ return False
+ fctx = cctx[standin]
+ key = (filename, fctx.filenode())
+ if key in verified:
+ return False
+
+ verified.add(key)
+
+ stat = self._stat(hash)
+ if not stat:
+ return False
+ elif stat == 1:
+ self.ui.warn(
+ _('changeset %s: %s: contents differ\n')
+ % (cset, filename))
+ return True # failed
+ elif stat == 2:
+ self.ui.warn(
+ _('changeset %s: %s missing\n')
+ % (cset, filename))
+ return True # failed
+ else:
+ raise util.Abort(_('check failed, unexpected response'
+ 'statlfile: %d') % stat)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/reposetup.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,411 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''setup for largefiles repositories: reposetup'''
+import copy
+import types
+import os
+import re
+
+from mercurial import context, error, manifest, match as match_, \
+ node, util
+from mercurial.i18n import _
+
+import lfcommands
+import proto
+import lfutil
+
+def reposetup(ui, repo):
+ # wire repositories should be given new wireproto functions but not the
+ # other largefiles modifications
+ if not repo.local():
+ return proto.wirereposetup(ui, repo)
+
+ for name in ('status', 'commitctx', 'commit', 'push'):
+ method = getattr(repo, name)
+ #if not (isinstance(method, types.MethodType) and
+ # method.im_func is repo.__class__.commitctx.im_func):
+ if isinstance(method, types.FunctionType) and method.func_name == \
+ 'wrap':
+ ui.warn(_('largefiles: repo method %r appears to have already been'
+ ' wrapped by another extension: '
+ 'largefiles may behave incorrectly\n')
+ % name)
+
+ class lfiles_repo(repo.__class__):
+ lfstatus = False
+ def status_nolfiles(self, *args, **kwargs):
+ return super(lfiles_repo, self).status(*args, **kwargs)
+
+ # When lfstatus is set, return a context that gives the names of lfiles
+ # instead of their corresponding standins and identifies the lfiles as
+ # always binary, regardless of their actual contents.
+ def __getitem__(self, changeid):
+ ctx = super(lfiles_repo, self).__getitem__(changeid)
+ if self.lfstatus:
+ class lfiles_manifestdict(manifest.manifestdict):
+ def __contains__(self, filename):
+ if super(lfiles_manifestdict,
+ self).__contains__(filename):
+ return True
+ return super(lfiles_manifestdict,
+ self).__contains__(lfutil.shortname+'/' + filename)
+ class lfiles_ctx(ctx.__class__):
+ def files(self):
+ filenames = super(lfiles_ctx, self).files()
+ return [re.sub('^\\'+lfutil.shortname+'/', '', filename) for filename
+ in filenames]
+ def manifest(self):
+ man1 = super(lfiles_ctx, self).manifest()
+ man1.__class__ = lfiles_manifestdict
+ return man1
+ def filectx(self, path, fileid=None, filelog=None):
+ try:
+ result = super(lfiles_ctx, self).filectx(path,
+ fileid, filelog)
+ except error.LookupError:
+ # Adding a null character will cause Mercurial to
+ # identify this as a binary file.
+ result = super(lfiles_ctx, self).filectx(
+ lfutil.shortname + '/' + path, fileid,
+ filelog)
+ olddata = result.data
+ result.data = lambda: olddata() + '\0'
+ return result
+ ctx.__class__ = lfiles_ctx
+ return ctx
+
+ # Figure out the status of big files and insert them into the
+ # appropriate list in the result. Also removes standin files from
+ # the listing. This function reverts to the original status if
+ # self.lfstatus is False
+ def status(self, node1='.', node2=None, match=None, ignored=False,
+ clean=False, unknown=False, listsubrepos=False):
+ listignored, listclean, listunknown = ignored, clean, unknown
+ if not self.lfstatus:
+ try:
+ return super(lfiles_repo, self).status(node1, node2, match,
+ listignored, listclean, listunknown, listsubrepos)
+ except TypeError:
+ return super(lfiles_repo, self).status(node1, node2, match,
+ listignored, listclean, listunknown)
+ else:
+ # some calls in this function rely on the old version of status
+ self.lfstatus = False
+ if isinstance(node1, context.changectx):
+ ctx1 = node1
+ else:
+ ctx1 = repo[node1]
+ if isinstance(node2, context.changectx):
+ ctx2 = node2
+ else:
+ ctx2 = repo[node2]
+ working = ctx2.rev() is None
+ parentworking = working and ctx1 == self['.']
+
+ def inctx(file, ctx):
+ try:
+ if ctx.rev() is None:
+ return file in ctx.manifest()
+ ctx[file]
+ return True
+ except:
+ return False
+
+ # create a copy of match that matches standins instead of
+ # lfiles if matcher not set then it is the always matcher so
+ # overwrite that
+ if match is None:
+ match = match_.always(self.root, self.getcwd())
+
+ def tostandin(file):
+ if inctx(lfutil.standin(file), ctx2):
+ return lfutil.standin(file)
+ return file
+
+ m = copy.copy(match)
+ m._files = [tostandin(f) for f in m._files]
+
+ # get ignored clean and unknown but remove them later if they
+ # were not asked for
+ try:
+ result = super(lfiles_repo, self).status(node1, node2, m,
+ True, True, True, listsubrepos)
+ except TypeError:
+ result = super(lfiles_repo, self).status(node1, node2, m,
+ True, True, True)
+ if working:
+ # Hold the wlock while we read lfiles and update the
+ # lfdirstate
+ wlock = repo.wlock()
+ try:
+ # Any non lfiles that were explicitly listed must be
+ # taken out or lfdirstate.status will report an error.
+ # The status of these files was already computed using
+ # super's status.
+ lfdirstate = lfutil.openlfdirstate(ui, self)
+ match._files = [f for f in match._files if f in
+ lfdirstate]
+ s = lfdirstate.status(match, [], listignored,
+ listclean, listunknown)
+ (unsure, modified, added, removed, missing, unknown,
+ ignored, clean) = s
+ if parentworking:
+ for lfile in unsure:
+ if ctx1[lfutil.standin(lfile)].data().strip() \
+ != lfutil.hashfile(self.wjoin(lfile)):
+ modified.append(lfile)
+ else:
+ clean.append(lfile)
+ lfdirstate.normal(lfile)
+ lfdirstate.write()
+ else:
+ tocheck = unsure + modified + added + clean
+ modified, added, clean = [], [], []
+
+ for lfile in tocheck:
+ standin = lfutil.standin(lfile)
+ if inctx(standin, ctx1):
+ if ctx1[standin].data().strip() != \
+ lfutil.hashfile(self.wjoin(lfile)):
+ modified.append(lfile)
+ else:
+ clean.append(lfile)
+ else:
+ added.append(lfile)
+ finally:
+ wlock.release()
+
+ for standin in ctx1.manifest():
+ if not lfutil.isstandin(standin):
+ continue
+ lfile = lfutil.splitstandin(standin)
+ if not match(lfile):
+ continue
+ if lfile not in lfdirstate:
+ removed.append(lfile)
+ # Handle unknown and ignored differently
+ lfiles = (modified, added, removed, missing, [], [], clean)
+ result = list(result)
+ # Unknown files
+ result[4] = [f for f in unknown if repo.dirstate[f] == '?'\
+ and not lfutil.isstandin(f)]
+ # Ignored files must be ignored by both the dirstate and
+ # lfdirstate
+ result[5] = set(ignored).intersection(set(result[5]))
+ # combine normal files and lfiles
+ normals = [[fn for fn in filelist if not \
+ lfutil.isstandin(fn)] for filelist in result]
+ result = [sorted(list1 + list2) for (list1, list2) in \
+ zip(normals, lfiles)]
+ else:
+ def toname(f):
+ if lfutil.isstandin(f):
+ return lfutil.splitstandin(f)
+ return f
+ result = [[toname(f) for f in items] for items in result]
+
+ if not listunknown:
+ result[4] = []
+ if not listignored:
+ result[5] = []
+ if not listclean:
+ result[6] = []
+ self.lfstatus = True
+ return result
+
+ # This call happens after a commit has occurred. Copy all of the lfiles
+ # into the cache
+ def commitctx(self, *args, **kwargs):
+ node = super(lfiles_repo, self).commitctx(*args, **kwargs)
+ ctx = self[node]
+ for filename in ctx.files():
+ if lfutil.isstandin(filename) and filename in ctx.manifest():
+ realfile = lfutil.splitstandin(filename)
+ lfutil.copytocache(self, ctx.node(), realfile)
+
+ return node
+
+ # This call happens before a commit has occurred. The lfile standins
+ # have not had their contents updated (to reflect the hash of their
+ # lfile). Do that here.
+ def commit(self, text="", user=None, date=None, match=None,
+ force=False, editor=False, extra={}):
+ orig = super(lfiles_repo, self).commit
+
+ wlock = repo.wlock()
+ try:
+ if getattr(repo, "_isrebasing", False):
+ # We have to take the time to pull down the new lfiles now.
+ # Otherwise if we are rebasing, any lfiles that were
+ # modified in the changesets we are rebasing on top of get
+ # overwritten either by the rebase or in the first commit
+ # after the rebase.
+ lfcommands.updatelfiles(repo.ui, repo)
+ # Case 1: user calls commit with no specific files or
+ # include/exclude patterns: refresh and commit everything.
+ if (match is None) or (not match.anypats() and not \
+ match.files()):
+ lfiles = lfutil.listlfiles(self)
+ lfdirstate = lfutil.openlfdirstate(ui, self)
+ # this only loops through lfiles that exist (not
+ # removed/renamed)
+ for lfile in lfiles:
+ if os.path.exists(self.wjoin(lfutil.standin(lfile))):
+ # this handles the case where a rebase is being
+ # performed and the working copy is not updated
+ # yet.
+ if os.path.exists(self.wjoin(lfile)):
+ lfutil.updatestandin(self,
+ lfutil.standin(lfile))
+ lfdirstate.normal(lfile)
+ for lfile in lfdirstate:
+ if not os.path.exists(
+ repo.wjoin(lfutil.standin(lfile))):
+ try:
+ # Mercurial >= 1.9
+ lfdirstate.drop(lfile)
+ except AttributeError:
+ # Mercurial <= 1.8
+ lfdirstate.forget(lfile)
+ lfdirstate.write()
+
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+
+ for file in match.files():
+ if lfutil.isstandin(file):
+ raise util.Abort(
+ "Don't commit largefile standin. Commit largefile.")
+
+ # Case 2: user calls commit with specified patterns: refresh
+ # any matching big files.
+ smatcher = lfutil.composestandinmatcher(self, match)
+ standins = lfutil.dirstate_walk(self.dirstate, smatcher)
+
+ # No matching big files: get out of the way and pass control to
+ # the usual commit() method.
+ if not standins:
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+
+ # Refresh all matching big files. It's possible that the
+ # commit will end up failing, in which case the big files will
+ # stay refreshed. No harm done: the user modified them and
+ # asked to commit them, so sooner or later we're going to
+ # refresh the standins. Might as well leave them refreshed.
+ lfdirstate = lfutil.openlfdirstate(ui, self)
+ for standin in standins:
+ lfile = lfutil.splitstandin(standin)
+ if lfdirstate[lfile] <> 'r':
+ lfutil.updatestandin(self, standin)
+ lfdirstate.normal(lfile)
+ else:
+ try:
+ # Mercurial >= 1.9
+ lfdirstate.drop(lfile)
+ except AttributeError:
+ # Mercurial <= 1.8
+ lfdirstate.forget(lfile)
+ lfdirstate.write()
+
+ # Cook up a new matcher that only matches regular files or
+ # standins corresponding to the big files requested by the
+ # user. Have to modify _files to prevent commit() from
+ # complaining "not tracked" for big files.
+ lfiles = lfutil.listlfiles(repo)
+ match = copy.copy(match)
+ orig_matchfn = match.matchfn
+
+ # Check both the list of lfiles and the list of standins
+ # because if a lfile was removed, it won't be in the list of
+ # lfiles at this point
+ match._files += sorted(standins)
+
+ actualfiles = []
+ for f in match._files:
+ fstandin = lfutil.standin(f)
+
+ # Ignore known lfiles and standins
+ if f in lfiles or fstandin in standins:
+ continue
+
+ # Append directory separator to avoid collisions
+ if not fstandin.endswith(os.sep):
+ fstandin += os.sep
+
+ # Prevalidate matching standin directories
+ if lfutil.any_(st for st in match._files if \
+ st.startswith(fstandin)):
+ continue
+ actualfiles.append(f)
+ match._files = actualfiles
+
+ def matchfn(f):
+ if orig_matchfn(f):
+ return f not in lfiles
+ else:
+ return f in standins
+
+ match.matchfn = matchfn
+ return orig(text=text, user=user, date=date, match=match,
+ force=force, editor=editor, extra=extra)
+ finally:
+ wlock.release()
+
+ def push(self, remote, force=False, revs=None, newbranch=False):
+ o = lfutil.findoutgoing(repo, remote, force)
+ if o:
+ toupload = set()
+ o = repo.changelog.nodesbetween(o, revs)[0]
+ for n in o:
+ parents = [p for p in repo.changelog.parents(n) if p != \
+ node.nullid]
+ ctx = repo[n]
+ files = set(ctx.files())
+ if len(parents) == 2:
+ mc = ctx.manifest()
+ mp1 = ctx.parents()[0].manifest()
+ mp2 = ctx.parents()[1].manifest()
+ for f in mp1:
+ if f not in mc:
+ files.add(f)
+ for f in mp2:
+ if f not in mc:
+ files.add(f)
+ for f in mc:
+ if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f,
+ None):
+ files.add(f)
+
+ toupload = toupload.union(set([ctx[f].data().strip() for f\
+ in files if lfutil.isstandin(f) and f in ctx]))
+ lfcommands.uploadlfiles(ui, self, remote, toupload)
+ # Mercurial >= 1.6 takes the newbranch argument, try that first.
+ try:
+ return super(lfiles_repo, self).push(remote, force, revs,
+ newbranch)
+ except TypeError:
+ return super(lfiles_repo, self).push(remote, force, revs)
+
+ repo.__class__ = lfiles_repo
+
+ def checkrequireslfiles(ui, repo, **kwargs):
+ if 'largefiles' not in repo.requirements and lfutil.any_(
+ lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()):
+ # work around bug in mercurial 1.9 whereby requirements is a list
+ # on newly-cloned repos
+ repo.requirements = set(repo.requirements)
+
+ repo.requirements |= set(['largefiles'])
+ repo._writerequirements()
+
+ checkrequireslfiles(ui, repo)
+
+ ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles)
+ ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/uisetup.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,125 @@
+# Copyright 2009-2010 Gregory P. Ward
+# Copyright 2009-2010 Intelerad Medical Systems Incorporated
+# Copyright 2010-2011 Fog Creek Software
+# Copyright 2010-2011 Unity Technologies
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''setup for largefiles extension: uisetup'''
+
+from mercurial import archival, cmdutil, commands, extensions, filemerge, hg, \
+ httprepo, localrepo, sshrepo, sshserver, wireproto
+from mercurial.i18n import _
+from mercurial.hgweb import hgweb_mod, protocol
+
+import overrides
+import proto
+
+def uisetup(ui):
+ # Disable auto-status for some commands which assume that all
+ # files in the result are under Mercurial's control
+
+ entry = extensions.wrapcommand(commands.table, 'add', overrides.override_add)
+ addopt = [('', 'large', None, _('add as largefile')),
+ ('', 'lfsize', '', _('add all files above this size (in megabytes)'
+ 'as largefiles (default: 10)'))]
+ entry[1].extend(addopt)
+
+ entry = extensions.wrapcommand(commands.table, 'addremove',
+ overrides.override_addremove)
+ entry = extensions.wrapcommand(commands.table, 'remove', overrides.override_remove)
+ entry = extensions.wrapcommand(commands.table, 'forget', overrides.override_forget)
+ entry = extensions.wrapcommand(commands.table, 'status', overrides.override_status)
+ entry = extensions.wrapcommand(commands.table, 'log', overrides.override_log)
+ entry = extensions.wrapcommand(commands.table, 'rollback',
+ overrides.override_rollback)
+
+ entry = extensions.wrapcommand(commands.table, 'verify', overrides.override_verify)
+ verifyopt = [('', 'large', None, _('verify largefiles')),
+ ('', 'lfa', None,
+ _('verify all revisions of largefiles not just current')),
+ ('', 'lfc', None,
+ _('verify largefile contents not just existence'))]
+ entry[1].extend(verifyopt)
+
+ entry = extensions.wrapcommand(commands.table, 'outgoing',
+ overrides.override_outgoing)
+ outgoingopt = [('', 'large', None, _('display outgoing largefiles'))]
+ entry[1].extend(outgoingopt)
+ entry = extensions.wrapcommand(commands.table, 'summary', overrides.override_summary)
+ summaryopt = [('', 'large', None, _('display outgoing largefiles'))]
+ entry[1].extend(summaryopt)
+
+ entry = extensions.wrapcommand(commands.table, 'update', overrides.override_update)
+ entry = extensions.wrapcommand(commands.table, 'pull', overrides.override_pull)
+ entry = extensions.wrapfunction(filemerge, 'filemerge', overrides.override_filemerge)
+ entry = extensions.wrapfunction(cmdutil, 'copy', overrides.override_copy)
+
+ # Backout calls revert so we need to override both the command and the
+ # function
+ entry = extensions.wrapcommand(commands.table, 'revert', overrides.override_revert)
+ entry = extensions.wrapfunction(commands, 'revert', overrides.override_revert)
+
+ # clone uses hg._update instead of hg.update even though they are the
+ # same function... so wrap both of them)
+ extensions.wrapfunction(hg, 'update', overrides.hg_update)
+ extensions.wrapfunction(hg, '_update', overrides.hg_update)
+ extensions.wrapfunction(hg, 'clean', overrides.hg_clean)
+ extensions.wrapfunction(hg, 'merge', overrides.hg_merge)
+
+ extensions.wrapfunction(archival, 'archive', overrides.override_archive)
+ if hasattr(cmdutil, 'bailifchanged'):
+ extensions.wrapfunction(cmdutil, 'bailifchanged',
+ overrides.override_bailifchanged)
+ else:
+ extensions.wrapfunction(cmdutil, 'bail_if_changed',
+ overrides.override_bailifchanged)
+
+ # create the new wireproto commands ...
+ wireproto.commands['putlfile'] = (proto.putlfile, 'sha')
+ wireproto.commands['getlfile'] = (proto.getlfile, 'sha')
+ wireproto.commands['statlfile'] = (proto.statlfile, 'sha')
+
+ # ... and wrap some existing ones
+ wireproto.commands['capabilities'] = (proto.capabilities, '')
+ wireproto.commands['heads'] = (proto.heads, '')
+ wireproto.commands['lheads'] = (wireproto.heads, '')
+
+ # make putlfile behave the same as push and {get,stat}lfile behave the same
+ # as pull w.r.t. permissions checks
+ hgweb_mod.perms['putlfile'] = 'push'
+ hgweb_mod.perms['getlfile'] = 'pull'
+ hgweb_mod.perms['statlfile'] = 'pull'
+
+ # the hello wireproto command uses wireproto.capabilities, so it won't see
+ # our largefiles capability unless we replace the actual function as well.
+ proto.capabilities_orig = wireproto.capabilities
+ wireproto.capabilities = proto.capabilities
+
+ # these let us reject non-lfiles clients and make them display our error
+ # messages
+ protocol.webproto.refuseclient = proto.webproto_refuseclient
+ sshserver.sshserver.refuseclient = proto.sshproto_refuseclient
+
+ # can't do this in reposetup because it needs to have happened before
+ # wirerepo.__init__ is called
+ proto.ssh_oldcallstream = sshrepo.sshrepository._callstream
+ proto.http_oldcallstream = httprepo.httprepository._callstream
+ sshrepo.sshrepository._callstream = proto.sshrepo_callstream
+ httprepo.httprepository._callstream = proto.httprepo_callstream
+
+ # don't die on seeing a repo with the largefiles requirement
+ localrepo.localrepository.supported |= set(['largefiles'])
+
+ # override some extensions' stuff as well
+ for name, module in extensions.extensions():
+ if name == 'fetch':
+ extensions.wrapcommand(getattr(module, 'cmdtable'), 'fetch',
+ overrides.override_fetch)
+ if name == 'purge':
+ extensions.wrapcommand(getattr(module, 'cmdtable'), 'purge',
+ overrides.override_purge)
+ if name == 'rebase':
+ extensions.wrapcommand(getattr(module, 'cmdtable'), 'rebase',
+ overrides.override_rebase)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/usage.txt Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,51 @@
+Largefiles allows for tracking large, incompressible binary files in Mercurial
+without requiring excessive bandwidth for clones and pulls. Files added as
+largefiles are not tracked directly by Mercurial; rather, their revisions are
+identified by a checksum, and Mercurial tracks these checksums. This way, when
+you clone a repository or pull in changesets, the large files in older
+revisions of the repository are not needed, and only the ones needed to update
+to the current version are downloaded. This saves both disk space and
+bandwidth.
+
+If you are starting a new repository or adding new large binary files, using
+largefiles for them is as easy as adding '--large' to your hg add command. For
+example:
+
+$ dd if=/dev/urandom of=thisfileislarge count=2000
+$ hg add --large thisfileislarge
+$ hg commit -m 'add thisfileislarge, which is large, as a largefile'
+
+When you push a changeset that affects largefiles to a remote repository, its
+largefile revisions will be uploaded along with it. Note that the remote
+Mercurial must also have the largefiles extension enabled for this to work.
+
+When you pull a changeset that affects largefiles from a remote repository,
+nothing different from Mercurial's normal behavior happens. However, when you
+update to such a revision, any largefiles needed by that revision are
+downloaded and cached if they have never been downloaded before. This means
+that network access is required to update to revision you have not yet updated
+to.
+
+If you already have large files tracked by Mercurial without the largefiles
+extension, you will need to convert your repository in order to benefit from
+largefiles. This is done with the 'hg lfconvert' command:
+
+$ hg lfconvert --size 10 oldrepo newrepo
+
+By default, in repositories that already have largefiles in them, any new file
+over 10MB will automatically be added as largefiles. To change this
+threshhold, set [largefiles].size in your Mercurial config file to the minimum
+size in megabytes to track as a largefile, or use the --lfsize option to the
+add command (also in megabytes):
+
+[largefiles]
+size = 2
+
+$ hg add --lfsize 2
+
+The [largefiles].patterns config option allows you to specify specific
+space-separated filename patterns (in shell glob syntax) that should always be
+tracked as largefiles:
+
+[largefiles]
+pattens = *.jpg *.{png,bmp} library.zip content/audio/*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/largefiles/wirestore.py Sat Sep 24 17:35:45 2011 +0200
@@ -0,0 +1,29 @@
+# Copyright 2010-2011 Fog Creek Software
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''largefile store working over mercurial's wire protocol'''
+
+import lfutil
+import remotestore
+
+class wirestore(remotestore.remotestore):
+ def __init__(self, ui, repo, remote):
+ cap = remote.capable('largefiles')
+ if not cap:
+ raise lfutil.storeprotonotcapable([])
+ storetypes = cap.split(',')
+ if not 'serve' in storetypes:
+ raise lfutil.storeprotonotcapable(storetypes)
+ self.remote = remote
+ super(wirestore, self).__init__(ui, repo, remote.url())
+
+ def _put(self, hash, fd):
+ return self.remote.putlfile(hash, fd)
+
+ def _get(self, hash):
+ return self.remote.getlfile(hash)
+
+ def _stat(self, hash):
+ return self.remote.statlfile(hash)