Mercurial > hg
view mercurial/similar.py @ 12570:a72c5ff1260c stable
Correct Content-Type header values for archive downloads.
The content type for both .tar.gz and .tar.bz2 downloads was
application/x-tar, which is correct for .tar files when no
Content-Encoding is present, but is not correct for .tar.gz and .tar.bz2
files unless Content-Encoding is set to gzip or x-bzip2, respectively.
However, setting Content-Encoding causes browsers to undo that encoding
during download, when a .gz or .bz2 file is usually the desired
artifact. Omitting the Content-Encoding header is preferred to avoid
having browsers uncompress non-render-able files.
Additionally, the Content-Disposition line indicates a final desired
filename with .tar.gz or .tar.bz2 extension which makes providing a
Content-Encoding header inappropriate.
With the current configuration browsers (Chrome and Firefox thus far)
are registering the application/x-tar Content-Type and not .tar
extension and appending that extension, yielding filename.tar.gz.tar as
a final on-disk artifact. This was originally reported here:
http://stackoverflow.com/questions/3753659
I've changed the .tar.gz and .tar.bz2 Content-Type values to
application/x-gzip and application/x-bzip2, respectively. Which yields
correctly named download artifacts on Firefox, Chrome, and IE.
author | Ry4an Brase <ry4an-hg@ry4an.org> |
---|---|
date | Mon, 20 Sep 2010 14:56:08 -0500 |
parents | 0c8646292ca4 |
children | 525fdb738975 |
line wrap: on
line source
# similar.py - mechanisms for finding similar files # # Copyright 2005-2007 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. from i18n import _ import util import mdiff import bdiff def _findexactmatches(repo, added, removed): '''find renamed files that have no changes Takes a list of new filectxs and a list of removed filectxs, and yields (before, after) tuples of exact matches. ''' numfiles = len(added) + len(removed) # Get hashes of removed files. hashes = {} for i, fctx in enumerate(removed): repo.ui.progress(_('searching for exact renames'), i, total=numfiles) h = util.sha1(fctx.data()).digest() hashes[h] = fctx # For each added file, see if it corresponds to a removed file. for i, fctx in enumerate(added): repo.ui.progress(_('searching for exact renames'), i + len(removed), total=numfiles) h = util.sha1(fctx.data()).digest() if h in hashes: yield (hashes[h], fctx) # Done repo.ui.progress(_('searching for exact renames'), None) def _findsimilarmatches(repo, added, removed, threshold): '''find potentially renamed files based on similar file content Takes a list of new filectxs and a list of removed filectxs, and yields (before, after, score) tuples of partial matches. ''' copies = {} for i, r in enumerate(removed): repo.ui.progress(_('searching for similar files'), i, total=len(removed)) # lazily load text @util.cachefunc def data(): orig = r.data() return orig, mdiff.splitnewlines(orig) def score(text): orig, lines = data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 matches = bdiff.blocks(text, orig) for x1, x2, y1, y2 in matches: for line in lines[y1:y2]: equal += len(line) lengths = len(text) + len(orig) return equal * 2.0 / lengths for a in added: bestscore = copies.get(a, (None, threshold))[1] myscore = score(a.data()) if myscore >= bestscore: copies[a] = (r, myscore) repo.ui.progress(_('searching'), None) for dest, v in copies.iteritems(): source, score = v yield source, dest, score def findrenames(repo, added, removed, threshold): '''find renamed files -- yields (before, after, score) tuples''' parentctx = repo['.'] workingctx = repo[None] # Zero length files will be frequently unrelated to each other, and # tracking the deletion/addition of such a file will probably cause more # harm than good. We strip them out here to avoid matching them later on. addedfiles = set([workingctx[fp] for fp in added if workingctx[fp].size() > 0]) removedfiles = set([parentctx[fp] for fp in removed if fp in parentctx and parentctx[fp].size() > 0]) # Find exact matches. for (a, b) in _findexactmatches(repo, sorted(addedfiles), sorted(removedfiles)): addedfiles.remove(b) yield (a.path(), b.path(), 1.0) # If the user requested similar files to be matched, search for them also. if threshold < 1.0: for (a, b, score) in _findsimilarmatches(repo, sorted(addedfiles), sorted(removedfiles), threshold): yield (a.path(), b.path(), score)