diff mercurial/narrowspec.py @ 39531:0d572769046a

narrowspec: limit patterns to path: and rootfilesin: (BC) Some matcher patterns are computationally expensive and may even have security issues (e.g. evaluating some file sets). For these reasons, we want to limit the types of matcher patterns that can be used in narrow specs and by command line arguments used for defining narrow specs. This commit teaches ``narrowspec.parsepatterns()`` to validate the pattern types against "safe" patterns. Surprisingly, no existing tests broke. So tests for the feature have been added. We also added a function to validate a patterns data structure. This will be used in future commits. Differential Revision: https://phab.mercurial-scm.org/D4522
author Gregory Szorc <gregory.szorc@gmail.com>
date Tue, 11 Sep 2018 15:25:35 -0700
parents 2675d561f5cb
children 8d8e61df8259
line wrap: on
line diff
--- a/mercurial/narrowspec.py	Tue Sep 11 10:54:20 2018 -0700
+++ b/mercurial/narrowspec.py	Tue Sep 11 15:25:35 2018 -0700
@@ -20,6 +20,15 @@
 
 FILENAME = 'narrowspec'
 
+# Pattern prefixes that are allowed in narrow patterns. This list MUST
+# only contain patterns that are fast and safe to evaluate. Keep in mind
+# that patterns are supplied by clients and executed on remote servers
+# as part of wire protocol commands.
+VALID_PREFIXES = (
+    b'path:',
+    b'rootfilesin:',
+)
+
 def parseserverpatterns(text):
     """Parses the narrowspec format that's returned by the server."""
     includepats = set()
@@ -82,8 +91,42 @@
     return '%s:%s' % normalizesplitpattern(kind, pat)
 
 def parsepatterns(pats):
-    """Parses a list of patterns into a typed pattern set."""
-    return set(normalizepattern(p) for p in pats)
+    """Parses an iterable of patterns into a typed pattern set.
+
+    Patterns are assumed to be ``path:`` if no prefix is present.
+    For safety and performance reasons, only some prefixes are allowed.
+    See ``validatepatterns()``.
+
+    This function should be used on patterns that come from the user to
+    normalize and validate them to the internal data structure used for
+    representing patterns.
+    """
+    res = {normalizepattern(orig) for orig in pats}
+    validatepatterns(res)
+    return res
+
+def validatepatterns(pats):
+    """Validate that patterns are in the expected data structure and format.
+
+    And that is a set of normalized patterns beginning with ``path:`` or
+    ``rootfilesin:``.
+
+    This function should be used to validate internal data structures
+    and patterns that are loaded from sources that use the internal,
+    prefixed pattern representation (but can't necessarily be fully trusted).
+    """
+    if not isinstance(pats, set):
+        raise error.ProgrammingError('narrow patterns should be a set; '
+                                     'got %r' % pats)
+
+    for pat in pats:
+        if not pat.startswith(VALID_PREFIXES):
+            # Use a Mercurial exception because this can happen due to user
+            # bugs (e.g. manually updating spec file).
+            raise error.Abort(_('invalid prefix on narrow pattern: %s') % pat,
+                              hint=_('narrow patterns must begin with one of '
+                                     'the following: %s') %
+                                   ', '.join(VALID_PREFIXES))
 
 def format(includes, excludes):
     output = '[include]\n'