changeset 47859:155a2ec8a9dc

merge with stable
author Matt Harbison <matt_harbison@yahoo.com>
date Sun, 22 Aug 2021 16:32:06 -0400
parents de2e04fe4897 (diff) df94c13ddf60 (current diff)
children 08f16b3331df
files hgext/fix.py mercurial/configitems.py
diffstat 3 files changed, 80 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/configitems.py	Thu Aug 19 17:47:27 2021 +0200
+++ b/mercurial/configitems.py	Sun Aug 22 16:32:06 2021 -0400
@@ -1266,6 +1266,11 @@
 )
 coreconfigitem(
     b'experimental',
+    b'web.full-garbage-collection-rate',
+    default=1,  # still forcing a full collection on each request
+)
+coreconfigitem(
+    b'experimental',
     b'worker.wdir-get-thread-safe',
     default=False,
 )
--- a/mercurial/hgweb/hgwebdir_mod.py	Thu Aug 19 17:47:27 2021 +0200
+++ b/mercurial/hgweb/hgwebdir_mod.py	Sun Aug 22 16:32:06 2021 -0400
@@ -285,6 +285,7 @@
         self.lastrefresh = 0
         self.motd = None
         self.refresh()
+        self.requests_count = 0
         if not baseui:
             # set up environment for new ui
             extensions.loadall(self.ui)
@@ -341,6 +342,10 @@
 
         self.repos = repos
         self.ui = u
+        self.gc_full_collect_rate = self.ui.configint(
+            b'experimental', b'web.full-garbage-collection-rate'
+        )
+        self.gc_full_collections_done = 0
         encoding.encoding = self.ui.config(b'web', b'encoding')
         self.style = self.ui.config(b'web', b'style')
         self.templatepath = self.ui.config(
@@ -383,12 +388,27 @@
             finally:
                 # There are known cycles in localrepository that prevent
                 # those objects (and tons of held references) from being
-                # collected through normal refcounting. We mitigate those
-                # leaks by performing an explicit GC on every request.
-                # TODO remove this once leaks are fixed.
-                # TODO only run this on requests that create localrepository
-                # instances instead of every request.
-                gc.collect()
+                # collected through normal refcounting.
+                # In some cases, the resulting memory consumption can
+                # be tamed by performing explicit garbage collections.
+                # In presence of actual leaks or big long-lived caches, the
+                # impact on performance of such collections can become a
+                # problem, hence the rate shouldn't be set too low.
+                # See "Collecting the oldest generation" in
+                # https://devguide.python.org/garbage_collector
+                # for more about such trade-offs.
+                rate = self.gc_full_collect_rate
+
+                # this is not thread safe, but the consequence (skipping
+                # a garbage collection) is arguably better than risking
+                # to have several threads perform a collection in parallel
+                # (long useless wait on all threads).
+                self.requests_count += 1
+                if rate > 0 and self.requests_count % rate == 0:
+                    gc.collect()
+                    self.gc_full_collections_done += 1
+                else:
+                    gc.collect(generation=1)
 
     def _runwsgi(self, req, res):
         try:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-hgwebdir-gc.py	Sun Aug 22 16:32:06 2021 -0400
@@ -0,0 +1,49 @@
+from __future__ import absolute_import
+
+import os
+from mercurial.hgweb import hgwebdir_mod
+
+hgwebdir = hgwebdir_mod.hgwebdir
+
+os.mkdir(b'webdir')
+os.chdir(b'webdir')
+
+webdir = os.path.realpath(b'.')
+
+
+def trivial_response(req, res):
+    return []
+
+
+def make_hgwebdir(gc_rate=None):
+    config = os.path.join(webdir, b'hgwebdir.conf')
+    with open(config, 'wb') as configfile:
+        configfile.write(b'[experimental]\n')
+        if gc_rate is not None:
+            configfile.write(b'web.full-garbage-collection-rate=%d\n' % gc_rate)
+    hg_wd = hgwebdir(config)
+    hg_wd._runwsgi = trivial_response
+    return hg_wd
+
+
+def process_requests(webdir_instance, number):
+    # we don't care for now about passing realistic arguments
+    for _ in range(number):
+        for chunk in webdir_instance.run_wsgi(None, None):
+            pass
+
+
+without_gc = make_hgwebdir(gc_rate=0)
+process_requests(without_gc, 5)
+assert without_gc.requests_count == 5
+assert without_gc.gc_full_collections_done == 0
+
+with_gc = make_hgwebdir(gc_rate=2)
+process_requests(with_gc, 5)
+assert with_gc.requests_count == 5
+assert with_gc.gc_full_collections_done == 2
+
+with_systematic_gc = make_hgwebdir()  # default value of the setting
+process_requests(with_systematic_gc, 3)
+assert with_systematic_gc.requests_count == 3
+assert with_systematic_gc.gc_full_collections_done == 3