changeset 36455:24c2c760c1cb

statichttprepo: move HTTPRangeHandler from byterange and delete the latter It turns out we've been toting around 472 lines of Python just for this 20-ish line class that teaches urllib how to handle '206 Partial Content'. byterange.py showed up in my \sstr\( Python 3 dragnet, and found itself having overstayed its welcome in our codebase. # no-check-commit because we're moving code that has to have foo_bar naming. Differential Revision: https://phab.mercurial-scm.org/D2443
author Augie Fackler <augie@google.com>
date Sun, 25 Feb 2018 23:34:58 -0500
parents 3ab9d74dd1c5
children 23d12524a202
files mercurial/byterange.py mercurial/statichttprepo.py
diffstat 2 files changed, 27 insertions(+), 474 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/byterange.py	Sun Feb 25 23:09:58 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,472 +0,0 @@
-#   This library is free software; you can redistribute it and/or
-#   modify it under the terms of the GNU Lesser General Public
-#   License as published by the Free Software Foundation; either
-#   version 2.1 of the License, or (at your option) any later version.
-#
-#   This library is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-#   Lesser General Public License for more details.
-#
-#   You should have received a copy of the GNU Lesser General Public
-#   License along with this library; if not, see
-#   <http://www.gnu.org/licenses/>.
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-# $Id: byterange.py,v 1.9 2005/02/14 21:55:07 mstenner Exp $
-
-from __future__ import absolute_import
-
-import email
-import ftplib
-import mimetypes
-import os
-import re
-import socket
-import stat
-
-from . import (
-    urllibcompat,
-    util,
-)
-
-urlerr = util.urlerr
-urlreq = util.urlreq
-
-addclosehook = urlreq.addclosehook
-addinfourl = urlreq.addinfourl
-splitattr = urlreq.splitattr
-splitpasswd = urlreq.splitpasswd
-splitport = urlreq.splitport
-splituser = urlreq.splituser
-unquote = urlreq.unquote
-
-class RangeError(IOError):
-    """Error raised when an unsatisfiable range is requested."""
-
-class HTTPRangeHandler(urlreq.basehandler):
-    """Handler that enables HTTP Range headers.
-
-    This was extremely simple. The Range header is a HTTP feature to
-    begin with so all this class does is tell urllib2 that the
-    "206 Partial Content" response from the HTTP server is what we
-    expected.
-
-    Example:
-        import urllib2
-        import byterange
-
-        range_handler = range.HTTPRangeHandler()
-        opener = urlreq.buildopener(range_handler)
-
-        # install it
-        urlreq.installopener(opener)
-
-        # create Request and set Range header
-        req = urlreq.request('http://www.python.org/')
-        req.header['Range'] = 'bytes=30-50'
-        f = urlreq.urlopen(req)
-    """
-
-    def http_error_206(self, req, fp, code, msg, hdrs):
-        # 206 Partial Content Response
-        r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
-        r.code = code
-        r.msg = msg
-        return r
-
-    def http_error_416(self, req, fp, code, msg, hdrs):
-        # HTTP's Range Not Satisfiable error
-        raise RangeError('Requested Range Not Satisfiable')
-
-class RangeableFileObject(object):
-    """File object wrapper to enable raw range handling.
-    This was implemented primarily for handling range
-    specifications for file:// urls. This object effectively makes
-    a file object look like it consists only of a range of bytes in
-    the stream.
-
-    Examples:
-        # expose 10 bytes, starting at byte position 20, from
-        # /etc/aliases.
-        >>> fo = RangeableFileObject(file(b'/etc/passwd', b'r'), (20,30))
-        # seek seeks within the range (to position 23 in this case)
-        >>> fo.seek(3)
-        # tell tells where your at _within the range_ (position 3 in
-        # this case)
-        >>> fo.tell()
-        # read EOFs if an attempt is made to read past the last
-        # byte in the range. the following will return only 7 bytes.
-        >>> fo.read(30)
-    """
-
-    def __init__(self, fo, rangetup):
-        """Create a RangeableFileObject.
-        fo       -- a file like object. only the read() method need be
-                    supported but supporting an optimized seek() is
-                    preferable.
-        rangetup -- a (firstbyte,lastbyte) tuple specifying the range
-                    to work over.
-        The file object provided is assumed to be at byte offset 0.
-        """
-        self.fo = fo
-        (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
-        self.realpos = 0
-        self._do_seek(self.firstbyte)
-
-    def __getattr__(self, name):
-        """This effectively allows us to wrap at the instance level.
-        Any attribute not found in _this_ object will be searched for
-        in self.fo.  This includes methods."""
-        return getattr(self.fo, name)
-
-    def tell(self):
-        """Return the position within the range.
-        This is different from fo.seek in that position 0 is the
-        first byte position of the range tuple. For example, if
-        this object was created with a range tuple of (500,899),
-        tell() will return 0 when at byte position 500 of the file.
-        """
-        return (self.realpos - self.firstbyte)
-
-    def seek(self, offset, whence=0):
-        """Seek within the byte range.
-        Positioning is identical to that described under tell().
-        """
-        assert whence in (0, 1, 2)
-        if whence == 0:   # absolute seek
-            realoffset = self.firstbyte + offset
-        elif whence == 1: # relative seek
-            realoffset = self.realpos + offset
-        elif whence == 2: # absolute from end of file
-            # XXX: are we raising the right Error here?
-            raise IOError('seek from end of file not supported.')
-
-        # do not allow seek past lastbyte in range
-        if self.lastbyte and (realoffset >= self.lastbyte):
-            realoffset = self.lastbyte
-
-        self._do_seek(realoffset - self.realpos)
-
-    def read(self, size=-1):
-        """Read within the range.
-        This method will limit the size read based on the range.
-        """
-        size = self._calc_read_size(size)
-        rslt = self.fo.read(size)
-        self.realpos += len(rslt)
-        return rslt
-
-    def readline(self, size=-1):
-        """Read lines within the range.
-        This method will limit the size read based on the range.
-        """
-        size = self._calc_read_size(size)
-        rslt = self.fo.readline(size)
-        self.realpos += len(rslt)
-        return rslt
-
-    def _calc_read_size(self, size):
-        """Handles calculating the amount of data to read based on
-        the range.
-        """
-        if self.lastbyte:
-            if size > -1:
-                if ((self.realpos + size) >= self.lastbyte):
-                    size = (self.lastbyte - self.realpos)
-            else:
-                size = (self.lastbyte - self.realpos)
-        return size
-
-    def _do_seek(self, offset):
-        """Seek based on whether wrapped object supports seek().
-        offset is relative to the current position (self.realpos).
-        """
-        assert offset >= 0
-        seek = getattr(self.fo, 'seek', self._poor_mans_seek)
-        seek(self.realpos + offset)
-        self.realpos += offset
-
-    def _poor_mans_seek(self, offset):
-        """Seek by calling the wrapped file objects read() method.
-        This is used for file like objects that do not have native
-        seek support. The wrapped objects read() method is called
-        to manually seek to the desired position.
-        offset -- read this number of bytes from the wrapped
-                  file object.
-        raise RangeError if we encounter EOF before reaching the
-        specified offset.
-        """
-        pos = 0
-        bufsize = 1024
-        while pos < offset:
-            if (pos + bufsize) > offset:
-                bufsize = offset - pos
-            buf = self.fo.read(bufsize)
-            if len(buf) != bufsize:
-                raise RangeError('Requested Range Not Satisfiable')
-            pos += bufsize
-
-class FileRangeHandler(urlreq.filehandler):
-    """FileHandler subclass that adds Range support.
-    This class handles Range headers exactly like an HTTP
-    server would.
-    """
-    def open_local_file(self, req):
-        host = urllibcompat.gethost(req)
-        file = urllibcompat.getselector(req)
-        localfile = urlreq.url2pathname(file)
-        stats = os.stat(localfile)
-        size = stats[stat.ST_SIZE]
-        modified = email.Utils.formatdate(stats[stat.ST_MTIME])
-        mtype = mimetypes.guess_type(file)[0]
-        if host:
-            host, port = urlreq.splitport(host)
-            if port or socket.gethostbyname(host) not in self.get_names():
-                raise urlerr.urlerror('file not on local host')
-        fo = open(localfile,'rb')
-        brange = req.headers.get('Range', None)
-        brange = range_header_to_tuple(brange)
-        assert brange != ()
-        if brange:
-            (fb, lb) = brange
-            if lb == '':
-                lb = size
-            if fb < 0 or fb > size or lb > size:
-                raise RangeError('Requested Range Not Satisfiable')
-            size = (lb - fb)
-            fo = RangeableFileObject(fo, (fb, lb))
-        headers = email.message_from_string(
-            'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
-            (mtype or 'text/plain', size, modified))
-        return urlreq.addinfourl(fo, headers, 'file:'+file)
-
-
-# FTP Range Support
-# Unfortunately, a large amount of base FTP code had to be copied
-# from urllib and urllib2 in order to insert the FTP REST command.
-# Code modifications for range support have been commented as
-# follows:
-# -- range support modifications start/end here
-
-class FTPRangeHandler(urlreq.ftphandler):
-    def ftp_open(self, req):
-        host = urllibcompat.gethost(req)
-        if not host:
-            raise IOError('ftp error', 'no host given')
-        host, port = splitport(host)
-        if port is None:
-            port = ftplib.FTP_PORT
-        else:
-            port = int(port)
-
-        # username/password handling
-        user, host = splituser(host)
-        if user:
-            user, passwd = splitpasswd(user)
-        else:
-            passwd = None
-        host = unquote(host)
-        user = unquote(user or '')
-        passwd = unquote(passwd or '')
-
-        try:
-            host = socket.gethostbyname(host)
-        except socket.error as msg:
-            raise urlerr.urlerror(msg)
-        path, attrs = splitattr(req.get_selector())
-        dirs = path.split('/')
-        dirs = map(unquote, dirs)
-        dirs, file = dirs[:-1], dirs[-1]
-        if dirs and not dirs[0]:
-            dirs = dirs[1:]
-        try:
-            fw = self.connect_ftp(user, passwd, host, port, dirs)
-            if file:
-                type = 'I'
-            else:
-                type = 'D'
-
-            for attr in attrs:
-                attr, value = splitattr(attr)
-                if attr.lower() == 'type' and \
-                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
-                    type = value.upper()
-
-            # -- range support modifications start here
-            rest = None
-            range_tup = range_header_to_tuple(req.headers.get('Range', None))
-            assert range_tup != ()
-            if range_tup:
-                (fb, lb) = range_tup
-                if fb > 0:
-                    rest = fb
-            # -- range support modifications end here
-
-            fp, retrlen = fw.retrfile(file, type, rest)
-
-            # -- range support modifications start here
-            if range_tup:
-                (fb, lb) = range_tup
-                if lb == '':
-                    if retrlen is None or retrlen == 0:
-                        raise RangeError('Requested Range Not Satisfiable due'
-                                         ' to unobtainable file length.')
-                    lb = retrlen
-                    retrlen = lb - fb
-                    if retrlen < 0:
-                        # beginning of range is larger than file
-                        raise RangeError('Requested Range Not Satisfiable')
-                else:
-                    retrlen = lb - fb
-                    fp = RangeableFileObject(fp, (0, retrlen))
-            # -- range support modifications end here
-
-            headers = ""
-            mtype = mimetypes.guess_type(req.get_full_url())[0]
-            if mtype:
-                headers += "Content-Type: %s\n" % mtype
-            if retrlen is not None and retrlen >= 0:
-                headers += "Content-Length: %d\n" % retrlen
-            headers = email.message_from_string(headers)
-            return addinfourl(fp, headers, req.get_full_url())
-        except ftplib.all_errors as msg:
-            raise IOError('ftp error', msg)
-
-    def connect_ftp(self, user, passwd, host, port, dirs):
-        fw = ftpwrapper(user, passwd, host, port, dirs)
-        return fw
-
-class ftpwrapper(urlreq.ftpwrapper):
-    # range support note:
-    # this ftpwrapper code is copied directly from
-    # urllib. The only enhancement is to add the rest
-    # argument and pass it on to ftp.ntransfercmd
-    def retrfile(self, file, type, rest=None):
-        self.endtransfer()
-        if type in ('d', 'D'):
-            cmd = 'TYPE A'
-            isdir = 1
-        else:
-            cmd = 'TYPE ' + type
-            isdir = 0
-        try:
-            self.ftp.voidcmd(cmd)
-        except ftplib.all_errors:
-            self.init()
-            self.ftp.voidcmd(cmd)
-        conn = None
-        if file and not isdir:
-            # Use nlst to see if the file exists at all
-            try:
-                self.ftp.nlst(file)
-            except ftplib.error_perm as reason:
-                raise IOError('ftp error', reason)
-            # Restore the transfer mode!
-            self.ftp.voidcmd(cmd)
-            # Try to retrieve as a file
-            try:
-                cmd = 'RETR ' + file
-                conn = self.ftp.ntransfercmd(cmd, rest)
-            except ftplib.error_perm as reason:
-                if str(reason).startswith('501'):
-                    # workaround for REST not supported error
-                    fp, retrlen = self.retrfile(file, type)
-                    fp = RangeableFileObject(fp, (rest,''))
-                    return (fp, retrlen)
-                elif not str(reason).startswith('550'):
-                    raise IOError('ftp error', reason)
-        if not conn:
-            # Set transfer mode to ASCII!
-            self.ftp.voidcmd('TYPE A')
-            # Try a directory listing
-            if file:
-                cmd = 'LIST ' + file
-            else:
-                cmd = 'LIST'
-            conn = self.ftp.ntransfercmd(cmd)
-        self.busy = 1
-        # Pass back both a suitably decorated object and a retrieval length
-        return (addclosehook(conn[0].makefile('rb'),
-                            self.endtransfer), conn[1])
-
-
-####################################################################
-# Range Tuple Functions
-# XXX: These range tuple functions might go better in a class.
-
-_rangere = None
-def range_header_to_tuple(range_header):
-    """Get a (firstbyte,lastbyte) tuple from a Range header value.
-
-    Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
-    function pulls the firstbyte and lastbyte values and returns
-    a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
-    the header value, it is returned as an empty string in the
-    tuple.
-
-    Return None if range_header is None
-    Return () if range_header does not conform to the range spec
-    pattern.
-
-    """
-    global _rangere
-    if range_header is None:
-        return None
-    if _rangere is None:
-        _rangere = re.compile(br'^bytes=(\d{1,})-(\d*)')
-    match = _rangere.match(range_header)
-    if match:
-        tup = range_tuple_normalize(match.group(1, 2))
-        if tup and tup[1]:
-            tup = (tup[0], tup[1]+1)
-        return tup
-    return ()
-
-def range_tuple_to_header(range_tup):
-    """Convert a range tuple to a Range header value.
-    Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
-    if no range is needed.
-    """
-    if range_tup is None:
-        return None
-    range_tup = range_tuple_normalize(range_tup)
-    if range_tup:
-        if range_tup[1]:
-            range_tup = (range_tup[0], range_tup[1] - 1)
-        return 'bytes=%s-%s' % range_tup
-
-def range_tuple_normalize(range_tup):
-    """Normalize a (first_byte,last_byte) range tuple.
-    Return a tuple whose first element is guaranteed to be an int
-    and whose second element will be '' (meaning: the last byte) or
-    an int. Finally, return None if the normalized tuple == (0,'')
-    as that is equivalent to retrieving the entire file.
-    """
-    if range_tup is None:
-        return None
-    # handle first byte
-    fb = range_tup[0]
-    if fb in (None, ''):
-        fb = 0
-    else:
-        fb = int(fb)
-    # handle last byte
-    try:
-        lb = range_tup[1]
-    except IndexError:
-        lb = ''
-    else:
-        if lb is None:
-            lb = ''
-        elif lb != '':
-            lb = int(lb)
-    # check if range is over the entire file
-    if (fb, lb) == (0, ''):
-        return None
-    # check that the range is valid
-    if lb < fb:
-        raise RangeError('Invalid byte range: %s-%s' % (fb, lb))
-    return (fb, lb)
--- a/mercurial/statichttprepo.py	Sun Feb 25 23:09:58 2018 -0500
+++ b/mercurial/statichttprepo.py	Sun Feb 25 23:34:58 2018 -0500
@@ -13,7 +13,6 @@
 
 from .i18n import _
 from . import (
-    byterange,
     changelog,
     error,
     localrepo,
@@ -82,10 +81,36 @@
     def close(self):
         pass
 
+# _RangeError and _HTTPRangeHandler were originally in byterange.py,
+# which was itself extracted from urlgrabber. See the last version of
+# byterange.py from history if you need more information.
+class _RangeError(IOError):
+    """Error raised when an unsatisfiable range is requested."""
+
+class _HTTPRangeHandler(urlreq.basehandler):
+    """Handler that enables HTTP Range headers.
+
+    This was extremely simple. The Range header is a HTTP feature to
+    begin with so all this class does is tell urllib2 that the
+    "206 Partial Content" response from the HTTP server is what we
+    expected.
+    """
+
+    def http_error_206(self, req, fp, code, msg, hdrs):
+        # 206 Partial Content Response
+        r = urlreq.addinfourl(fp, hdrs, req.get_full_url())
+        r.code = code
+        r.msg = msg
+        return r
+
+    def http_error_416(self, req, fp, code, msg, hdrs):
+        # HTTP's Range Not Satisfiable error
+        raise _RangeError('Requested Range Not Satisfiable')
+
 def build_opener(ui, authinfo):
     # urllib cannot handle URLs with embedded user or passwd
     urlopener = url.opener(ui, authinfo)
-    urlopener.add_handler(byterange.HTTPRangeHandler())
+    urlopener.add_handler(_HTTPRangeHandler())
 
     class statichttpvfs(vfsmod.abstractvfs):
         def __init__(self, base):