archive: defer opening the output until a file is matched
authorJoerg Sonnenberger <joerg@bec.de>
Wed, 15 Nov 2023 22:11:34 +0100
changeset 51838 3b8d92f71d92
parent 51837 7933bcb02bfc
child 51842 3785814bc2b7
archive: defer opening the output until a file is matched Before, if no file is matched, an error is thrown, but the archive is created anyway. When using hgweb, an error 500 is returned as the response body already exists when the error is seen. Afterwards, the archive is created before the first match is emitted. If no match is found, no archive is created. This is more consistent behavior as an empty archive is not a representable in all output formats, e.g. tar archives.
hgext/largefiles/overrides.py
mercurial/archival.py
mercurial/commands.py
mercurial/hgweb/webcommands.py
mercurial/subrepo.py
tests/test-hgweb-empty.t
--- a/hgext/largefiles/overrides.py	Thu Sep 05 13:37:24 2024 +0200
+++ b/hgext/largefiles/overrides.py	Wed Nov 15 22:11:34 2023 +0100
@@ -1252,8 +1252,6 @@
     if kind not in archival.archivers:
         raise error.Abort(_(b"unknown archive type '%s'") % kind)
 
-    ctx = repo[node]
-
     if kind == b'files':
         if prefix:
             raise error.Abort(_(b'cannot give prefix when archiving to files'))
@@ -1262,6 +1260,36 @@
 
     if not match:
         match = scmutil.matchall(repo)
+    archiver = None
+    ctx = repo[node]
+
+    def opencallback():
+        """Return the archiver instance, creating it if necessary.
+
+        This function is called when the first actual entry is created.
+        It may be called multiple times from different layers.
+        When serving the archive via hgweb, no errors should happen after
+        this point.
+        """
+        nonlocal archiver
+        if archiver is None:
+            if callable(dest):
+                output = dest()
+            else:
+                output = dest
+            archiver = archival.archivers[kind](output, mtime or ctx.date()[0])
+            assert archiver is not None
+
+            if repo.ui.configbool(b"ui", b"archivemeta"):
+                metaname = b'.hg_archival.txt'
+                if match(metaname):
+                    write(
+                        metaname,
+                        0o644,
+                        False,
+                        lambda: archival.buildmetadata(ctx),
+                    )
+        return archiver
 
     def write(name, mode, islink, getdata):
         if not match(name):
@@ -1269,18 +1297,11 @@
         data = getdata()
         if decode:
             data = repo.wwritedata(name, data)
+        if archiver is None:
+            opencallback()
+        assert archiver is not None, "archive should be opened by now"
         archiver.addfile(prefix + name, mode, islink, data)
 
-    archiver = archival.archivers[kind](dest, mtime or ctx.date()[0])
-
-    if repo.ui.configbool(b"ui", b"archivemeta"):
-        write(
-            b'.hg_archival.txt',
-            0o644,
-            False,
-            lambda: archival.buildmetadata(ctx),
-        )
-
     for f in ctx:
         ff = ctx.flags(f)
         getdata = ctx[f].data
@@ -1319,18 +1340,19 @@
                 and lfstatus(sub._repo)
                 or util.nullcontextmanager()
             ):
-                sub.archive(archiver, subprefix, submatch)
+                sub.archive(opencallback, subprefix, submatch)
 
-    archiver.done()
+    if archiver:
+        archiver.done()
 
 
 @eh.wrapfunction(subrepo.hgsubrepo, 'archive')
 def hgsubrepoarchive(
-    orig, repo, archiver, prefix, match: matchmod.basematcher, decode=True
+    orig, repo, opener, prefix, match: matchmod.basematcher, decode=True
 ):
     lfenabled = hasattr(repo._repo, '_largefilesenabled')
     if not lfenabled or not repo._repo.lfstatus:
-        return orig(repo, archiver, prefix, match, decode)
+        return orig(repo, opener, prefix, match, decode)
 
     repo._get(repo._state + (b'hg',))
     rev = repo._state[1]
@@ -1339,7 +1361,10 @@
     if ctx.node() is not None:
         lfcommands.cachelfiles(repo.ui, repo._repo, ctx.node())
 
+    archiver = None
+
     def write(name, mode, islink, getdata):
+        nonlocal archiver
         # At this point, the standin has been replaced with the largefile name,
         # so the normal matcher works here without the lfutil variants.
         if not match(f):
@@ -1348,6 +1373,8 @@
         if decode:
             data = repo._repo.wwritedata(name, data)
 
+        if archiver is None:
+            archiver = opener()
         archiver.addfile(prefix + name, mode, islink, data)
 
     for f in ctx:
@@ -1387,7 +1414,7 @@
             and lfstatus(sub._repo)
             or util.nullcontextmanager()
         ):
-            sub.archive(archiver, subprefix, submatch, decode)
+            sub.archive(opener, subprefix, submatch, decode)
 
 
 # If a largefile is modified, the change is not reflected in its
--- a/mercurial/archival.py	Thu Sep 05 13:37:24 2024 +0200
+++ b/mercurial/archival.py	Wed Nov 15 22:11:34 2023 +0100
@@ -293,8 +293,9 @@
 ):
     """create archive of repo as it was at node.
 
-    dest can be name of directory, name of archive file, or file
-    object to write archive to.
+    dest can be name of directory, name of archive file, a callable, or file
+    object to write archive to. If it is a callable, it will called to open
+    the actual file object before the first archive member is written.
 
     kind is type of archive to create.
 
@@ -316,7 +317,37 @@
     else:
         prefix = tidyprefix(dest, kind, prefix)
 
+    archiver = None
+    ctx = repo[node]
+
+    def opencallback():
+        """Return the archiver instance, creating it if necessary.
+
+        This function is called when the first actual entry is created.
+        It may be called multiple times from different layers.
+        When serving the archive via hgweb, no errors should happen after
+        this point.
+        """
+        nonlocal archiver
+        if archiver is None:
+            if callable(dest):
+                output = dest()
+            else:
+                output = dest
+            archiver = archivers[kind](output, mtime or ctx.date()[0])
+            assert archiver is not None
+
+            if repo.ui.configbool(b"ui", b"archivemeta"):
+                metaname = b'.hg_archival.txt'
+                if match(metaname):
+                    write(metaname, 0o644, False, lambda: buildmetadata(ctx))
+        return archiver
+
     def write(name, mode, islink, getdata):
+        if archiver is None:
+            opencallback()
+        assert archiver is not None, "archive should be opened by now"
+
         data = getdata()
         if decode:
             data = repo.wwritedata(name, data)
@@ -325,17 +356,9 @@
     if kind not in archivers:
         raise error.Abort(_(b"unknown archive type '%s'") % kind)
 
-    ctx = repo[node]
-    archiver = archivers[kind](dest, mtime or ctx.date()[0])
-
     if not match:
         match = scmutil.matchall(repo)
 
-    if repo.ui.configbool(b"ui", b"archivemeta"):
-        name = b'.hg_archival.txt'
-        if match(name):
-            write(name, 0o644, False, lambda: buildmetadata(ctx))
-
     files = list(ctx.manifest().walk(match))
     total = len(files)
     if total:
@@ -358,10 +381,11 @@
             sub = ctx.workingsub(subpath)
             submatch = matchmod.subdirmatcher(subpath, match)
             subprefix = prefix + subpath + b'/'
-            total += sub.archive(archiver, subprefix, submatch, decode)
+            total += sub.archive(opencallback, subprefix, submatch, decode)
 
     if total == 0:
         raise error.Abort(_(b'no files match the archive pattern'))
 
+    assert archiver is not None, "archive should have been opened before"
     archiver.done()
     return total
--- a/mercurial/commands.py	Thu Sep 05 13:37:24 2024 +0200
+++ b/mercurial/commands.py	Wed Nov 15 22:11:34 2023 +0100
@@ -709,7 +709,8 @@
     if dest == b'-':
         if kind == b'files':
             raise error.InputError(_(b'cannot archive plain files to stdout'))
-        dest = cmdutil.makefileobj(ctx, dest)
+        realdest = dest
+        dest = lambda: cmdutil.makefileobj(ctx, realdest)
         if not prefix:
             prefix = os.path.basename(repo.root) + b'-%h'
 
--- a/mercurial/hgweb/webcommands.py	Thu Sep 05 13:37:24 2024 +0200
+++ b/mercurial/hgweb/webcommands.py	Wed Nov 15 22:11:34 2023 +0100
@@ -1283,35 +1283,46 @@
 
     mimetype, artype, extension, encoding = webutil.archivespecs[type_]
 
-    web.res.headers[b'Content-Type'] = mimetype
-    web.res.headers[b'Content-Disposition'] = b'attachment; filename=%s%s' % (
-        name,
-        extension,
-    )
-
-    if encoding:
-        web.res.headers[b'Content-Encoding'] = encoding
-
-    web.res.setbodywillwrite()
-    if list(web.res.sendresponse()):
-        raise error.ProgrammingError(
-            b'sendresponse() should not emit data if writing later'
-        )
-
     if web.req.method == b'HEAD':
         return []
 
-    bodyfh = web.res.getbodyfile()
+    def open_archive():
+        """Open the output "file" for the archiver.
+
+        This function starts the streaming response. Error reporting
+        after this point will result in short writes without proper
+        diagnostics to the client.
+        """
+        web.res.headers[b'Content-Type'] = mimetype
+        web.res.headers[
+            b'Content-Disposition'
+        ] = b'attachment; filename=%s%s' % (
+            name,
+            extension,
+        )
 
-    archival.archive(
+        if encoding:
+            web.res.headers[b'Content-Encoding'] = encoding
+
+        web.res.setbodywillwrite()
+        if list(web.res.sendresponse()):
+            raise error.ProgrammingError(
+                b'sendresponse() should not emit data if writing later'
+            )
+
+        return web.res.getbodyfile()
+
+    total = archival.archive(
         web.repo,
-        bodyfh,
+        open_archive,
         cnode,
         artype,
         prefix=name,
         match=match,
         subrepos=web.configbool(b"web", b"archivesubrepos"),
     )
+    if total == 0:
+        raise ErrorResponse(HTTP_NOT_FOUND, b'no files found in changeset')
 
     return []
 
--- a/mercurial/subrepo.py	Thu Sep 05 13:37:24 2024 +0200
+++ b/mercurial/subrepo.py	Wed Nov 15 22:11:34 2023 +0100
@@ -363,9 +363,7 @@
         """handle the files command for this subrepo"""
         return 1
 
-    def archive(
-        self, archiver, prefix, match: matchmod.basematcher, decode=True
-    ):
+    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
         files = [f for f in self.files() if match(f)]
         total = len(files)
         relpath = subrelpath(self)
@@ -373,10 +371,13 @@
             _(b'archiving (%s)') % relpath, unit=_(b'files'), total=total
         )
         progress.update(0)
+        archiver = None
         for name in files:
             flags = self.fileflags(name)
             mode = b'x' in flags and 0o755 or 0o644
             symlink = b'l' in flags
+            if archiver is None:
+                archiver = opener()
             archiver.addfile(
                 prefix + name, mode, symlink, self.filedata(name, decode)
             )
@@ -651,9 +652,7 @@
             )
 
     @annotatesubrepoerror
-    def archive(
-        self, archiver, prefix, match: matchmod.basematcher, decode=True
-    ):
+    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
         self._get(self._state + (b'hg',))
         files = [f for f in self.files() if match(f)]
         rev = self._state[1]
@@ -661,12 +660,12 @@
         scmutil.prefetchfiles(
             self._repo, [(ctx.rev(), scmutil.matchfiles(self._repo, files))]
         )
-        total = abstractsubrepo.archive(self, archiver, prefix, match)
+        total = abstractsubrepo.archive(self, opener, prefix, match)
         for subpath in ctx.substate:
             s = subrepo(ctx, subpath, True)
             submatch = matchmod.subdirmatcher(subpath, match)
             subprefix = prefix + subpath + b'/'
-            total += s.archive(archiver, subprefix, submatch, decode)
+            total += s.archive(opener, subprefix, submatch, decode)
         return total
 
     @annotatesubrepoerror
@@ -1910,9 +1909,7 @@
             else:
                 self.wvfs.unlink(f)
 
-    def archive(
-        self, archiver, prefix, match: matchmod.basematcher, decode=True
-    ):
+    def archive(self, opener, prefix, match: matchmod.basematcher, decode=True):
         total = 0
         source, revision = self._state
         if not revision:
@@ -1928,6 +1925,7 @@
         progress = self.ui.makeprogress(
             _(b'archiving (%s)') % relpath, unit=_(b'files')
         )
+        archiver = None
         progress.update(0)
         for info in tar:
             if info.isdir():
@@ -1944,6 +1942,8 @@
                 else:
                     self.ui.warn(_(b'skipping "%s" (unknown type)') % bname)
                     continue
+            if archiver is None:
+                archiver = opener()
             archiver.addfile(prefix + bname, info.mode, info.issym(), data)
             total += 1
             progress.increment()
--- a/tests/test-hgweb-empty.t	Thu Sep 05 13:37:24 2024 +0200
+++ b/tests/test-hgweb-empty.t	Wed Nov 15 22:11:34 2023 +0100
@@ -1,9 +1,15 @@
 #require serve
 
-Some tests for hgweb in an empty repository
+Some tests for hgweb in an empty repository and empty archive
 
   $ hg init test
   $ cd test
+  $ cat << EOF >> .hg/hgrc
+  > [web]
+  > allow-archive = zip
+  > [ui]
+  > archivemeta = False
+  > EOF
   $ hg serve -n test -p $HGPORT -d --pid-file=hg.pid -A access.log -E errors.log
   $ cat hg.pid >> $DAEMON_PIDS
   $ (get-with-headers.py localhost:$HGPORT 'shortlog')
@@ -44,6 +50,9 @@
   </ul>
   <ul>
   
+  <li>
+  <a href="/archive/tip.zip">zip</a>
+  </li>
   </ul>
   <ul>
    <li><a href="/help">help</a></li>
@@ -155,6 +164,9 @@
   </ul>
   <ul>
   
+  <li>
+  <a href="/archive/tip.zip">zip</a>
+  </li>
   </ul>
   <ul>
    <li><a href="/help">help</a></li>
@@ -264,6 +276,9 @@
   </ul>
   <ul>
   
+  <li>
+  <a href="/archive/tip.zip">zip</a>
+  </li>
   </ul>
   <ul>
    <li><a href="/help">help</a></li>
@@ -369,6 +384,9 @@
   </ul>
   <ul>
   
+  <li>
+  <a href="/archive/tip.zip">zip</a>
+  </li>
   </ul>
   <ul>
    <li><a href="/help">help</a></li>
@@ -428,4 +446,14 @@
   
   </feed>
 
+Fetching an empty archive
+-------------------------
+
+Test that archiving without matching files is rejected as error,
+not as Internal Server Error.
+
+  $ get-with-headers.py --headeronly localhost:$HGPORT archive/null.zip
+  403 Forbidden
+  [1]
+
   $ cd ..