Minor improvements

2026-06-30 21:41:03 +00:00 · 2026-06-29 22:20:22 +02:00 · 2026-06-29 22:20:22 +02:00 · 7b60bc8284
commit 7b60bc8284
parent 820efa7a8a
11 changed files with 626 additions and 104 deletions
--- a/tests/test_graphql.py
+++ b/tests/test_graphql.py
@ -727,5 +727,67 @@ class TestGraphqlUnicodeSafety(unittest.TestCase):
        self.assertIn("caf", gi._cell(u"caf\xe9"))


+class TestGraphqlSuggestionRecovery(unittest.TestCase):
+    """G1: schema recovery from 'Did you mean' suggestions when introspection is disabled."""
+
+    def setUp(self):
+        self._gql = gi._gqlSend
+
+    def tearDown(self):
+        gi._gqlSend = self._gql
+
+    def test_harvest_suggestions_both_quote_styles(self):
+        # graphql-js uses double quotes; some servers use single quotes + Oxford 'or'
+        self.assertEqual(
+            gi._harvestSuggestions('Cannot query field "x" on type "Query". Did you mean "user" or "search"?'),
+            ["user", "search"])
+        self.assertEqual(
+            gi._harvestSuggestions("Cannot query field 'x' on type 'Query'. Did you mean 'user', 'me', or 'node'?"),
+            ["user", "me", "node"])
+        self.assertEqual(gi._harvestSuggestions("no suggestion here"), [])
+
+    def test_suggest_fields_from_validation_errors(self):
+        # An unknown field elicits the closest real field names (graphql-js phrasing)
+        def fake(endpoint, query, variables=None):
+            if "{ user }" in query or "{user}" in query:
+                return '{"data":{"user":null}}', 200          # 'user' is a real (resolving) field
+            return ('{"errors":[{"message":"Cannot query field \\"%s\\" on type \\"Query\\". '
+                    'Did you mean \\"user\\", \\"search\\" or \\"login\\"?"}]}'
+                    % "zz", 200)
+        gi._gqlSend = fake
+        fields = gi._suggestFields("http://t/graphql", "query")
+        for expected in ("user", "search", "login"):
+            self.assertIn(expected, fields)
+
+    def test_suggest_args_from_unknown_argument(self):
+        def fake(endpoint, query, variables=None):
+            return ('{"errors":[{"message":"Unknown argument \\"zz\\" on field \\"Query.user\\". '
+                    'Did you mean \\"username\\"?"}]}', 200)
+        gi._gqlSend = fake
+        self.assertIn("username", gi._suggestArgs("http://t/graphql", "query", "user"))
+
+    def test_introspect_via_suggestions_builds_slots(self):
+        def fake(endpoint, query, variables=None):
+            # introspection-style queries already filtered upstream; here every unknown field
+            # yields the same suggestion set, and 'search' resolves as a real field
+            if "{ search }" in query or "{search}" in query:
+                return '{"data":{"search":[]}}', 200
+            if "Unknown argument" in query:   # never matches; args fall back to wordlist
+                return '{}', 200
+            return ('{"errors":[{"message":"Cannot query field \\"zz\\" on type \\"Query\\". '
+                    'Did you mean \\"search\\"?"}]}', 200)
+        gi._gqlSend = fake
+        slots = gi._introspectViaSuggestions("http://t/graphql")
+        self.assertIsNotNone(slots)
+        self.assertTrue(any(s.fieldName == "search" for s in slots))
+        self.assertTrue(all(s.strategy == "string" for s in slots))
+
+    def test_introspect_via_suggestions_none_without_suggestions(self):
+        def fake(endpoint, query, variables=None):
+            return '{"errors":[{"message":"Syntax Error: unexpected token"}]}', 200
+        gi._gqlSend = fake
+        self.assertIsNone(gi._introspectViaSuggestions("http://t/graphql"))
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_ssti.py
+++ b/tests/test_ssti.py
@ -313,10 +313,13 @@ class TestBooleanUniqueness(unittest.TestCase):
        jinja2 = ssti._ENGINE_TABLE[0]
        self.assertTrue(ssti._booleanUniquelyIdentifies(jinja2))

-    def test_freemarker_boolean_not_unique(self):
+    def test_freemarker_boolean_unique_with_computer_format(self):
        freemarker = [e for e in ssti._ENGINE_TABLE if e.name == "Freemarker"][0]
-        # Freemarker and SpringEL both use ("${}", "true", "false") signature
-        self.assertFalse(ssti._booleanUniquelyIdentifies(freemarker))
+        # FreeMarker uses ${true?c} (computer-format), distinct from SpringEL's ${true} and
+        # Mako's ${True}, so its boolean rendering now uniquely identifies it within the ${ } family
+        self.assertTrue(ssti._booleanUniquelyIdentifies(freemarker))
+        spring = [e for e in ssti._ENGINE_TABLE if "Spring" in e.name][0]
+        self.assertTrue(ssti._booleanUniquelyIdentifies(spring))

    def test_jinja2_with_arithmetic_and_boolean_is_exact(self):
        """Arithmetic + boolean (unique) should produce exact engine name,
@ -467,3 +470,142 @@ class TestCommandEscaping(unittest.TestCase):
        self.assertEqual(ssti._escapeSingleQuoted("hello"), "hello")
        self.assertEqual(ssti._escapeSingleQuoted("it's"), "it\\'s")
        self.assertEqual(ssti._escapeSingleQuoted("a\\b"), "a\\\\b")
+
+
+class TestEngineMatrix(unittest.TestCase):
+    """For EVERY engine in the table, stand up a faithful mock server running that
+    engine and assert _fingerprint() identifies it. This proves each engine's full
+    detection path (arithmetic/boolean/error/distinguishing) actually works end to
+    end - not just Jinja2 - and guards against regressions like the ERB '%>' format
+    bug where a delimiter containing '%' silently disabled arithmetic detection."""
+
+    def setUp(self):
+        self.original_send = ssti._send
+
+    def tearDown(self):
+        ssti._send = self.original_send
+
+    # Digit-free, boolean-word-free sample errors that match each engine's errorRegex.
+    # (digit/boolean-free so a sibling engine's boolean probe falling through to the error
+    # branch on this server is still correctly rejected.)
+    _ERRORS = {
+        "Jinja2": "jinja2.exceptions.TemplateSyntaxError: unexpected end of template",
+        "Mako": "mako.exceptions.SyntaxException: unclosed control structure",
+        "Twig": "Twig_Error_Syntax: unexpected token in template",
+        "Freemarker": "freemarker.core.ParseException: encountered unexpected directive",
+        "Velocity": "org.apache.velocity.runtime.parser.ParseErrorException: encountered eof",
+        "Spring EL / Thymeleaf": "org.springframework.expression.spel.SpelParseException: bad node",
+        "ERB": "(erb): syntax error, unexpected end-of-input",
+        "Pug/Jade": "pug: unexpected token in template",
+        "Handlebars": "Handlebars: Parse error on line one",
+    }
+
+    # Real divide-by-zero error text per language family (captured from live Mako/ERB/Jinja2
+    # backends), so the S2 family probe can be exercised. JS yields Infinity (no error).
+    _DIVZERO = {
+        "python": "ZeroDivisionError: division by zero",
+        "ruby":   "ZeroDivisionError: divided by 0",
+        "php":    "DivisionByZeroError: Division by zero",
+        "java":   "java.lang.ArithmeticException: / by zero",
+        "nodejs": "Hello Infinity",
+    }
+
+    @staticmethod
+    def _make_server(engine, errors):
+        import re
+        op = re.escape(engine.delimiter)
+        cl = re.escape(engine.delimiterClose)
+        arithRe = re.compile(op + r"\s*(\d+)\s*\*\s*(\d+)\s*" + cl) if engine.arithmeticFmt else None
+        divZero = TestEngineMatrix._DIVZERO
+        err = errors.get(engine.name)
+
+        def server(place, parameter, value):
+            # 1) engine-specific distinguishing probe
+            if engine.distinguishingProbe and engine.distinguishingProbe in value:
+                if engine.distinguishingResult:
+                    return "Hello " + engine.distinguishingResult
+                return "Hello"          # comment-style probe -> stays at baseline
+            # 2) this engine's own boolean rendering
+            if engine.booleanTrue and engine.booleanTrue in value:
+                return "Hello " + engine.trueRendered
+            if engine.booleanFalse and engine.booleanFalse in value:
+                return "Hello " + engine.falseRendered
+            # 3) divide-by-zero -> language-family-specific error (S2), for engines that evaluate it
+            if arithRe is not None and (engine.delimiter + "1/0" + engine.delimiterClose) in value:
+                return divZero.get(engine.family, "Hello")
+            # 4) arithmetic, but ONLY for engines that actually evaluate it
+            if arithRe is not None:
+                m = arithRe.search(value)
+                if m:
+                    return "Hello %d" % (int(m.group(1)) * int(m.group(2)))
+            # 5) malformed fragment in this engine's delimiter -> engine-specific error
+            if err and any(p in value for p in engine.errorProbes):
+                return err
+            # 6) anything else (incl. other engines' payloads) renders inertly
+            return "Hello"
+
+        return server
+
+    def test_every_engine_is_fingerprinted(self):
+        for engine in ssti._ENGINE_TABLE:
+            ssti._send = self._make_server(engine, self._ERRORS)
+            result, evidence = ssti._fingerprint("GET", "q")
+            self.assertIsNotNone(result, "engine '%s' was not detected at all" % engine.name)
+            self.assertIn(engine.name, result.name,
+                "server running '%s' was identified as '%s'" % (engine.name, result.name))
+
+    def test_family_probe_confirms_language(self):
+        # S2: the divide-by-zero probe must confirm the backend family for every
+        # expression-evaluating, non-JS engine (Python/Ruby/PHP/Java).
+        for engine in ssti._ENGINE_TABLE:
+            if not (engine.arithmeticFmt and engine.delimiterClose):
+                continue
+            if engine.family not in ("python", "ruby", "php", "java"):
+                continue
+            ssti._send = self._make_server(engine, self._ERRORS)
+            _result, evidence = ssti._fingerprint("GET", "q")
+            self.assertTrue(evidence.get("family"),
+                "family probe should confirm '%s' on a %s backend" % (engine.name, engine.family))
+
+    def test_filter_evasion_rce_fallbacks_present(self):
+        # S3: each engine must retain its filter-evasion / sandbox-escape RCE fallbacks.
+        def rce(name):
+            return " ".join(p for p, _d in next(e for e in ssti._ENGINE_TABLE if e.name == name).rcePayloads)
+        jinja = rce("Jinja2")
+        self.assertIn("attr(", jinja)                       # dot/underscore-free attr() chain
+        self.assertIn("\\x5f", jinja)                       # hex-escaped dunders
+        twig = rce("Twig")
+        self.assertIn("sort('system')", twig)
+        self.assertIn("map('system')", twig)
+        spring = rce("Spring EL / Thymeleaf")
+        self.assertIn("readLine", spring)                   # output-capturing SpEL
+        self.assertIn("@java.lang.Runtime@getRuntime", spring)   # OGNL fallback
+
+    def test_family_probe_does_not_crossmatch(self):
+        # Python 'division by zero' must NOT satisfy the (case-sensitive) PHP signature, so a
+        # Jinja2/Python server never lets Twig/PHP claim a family match.
+        jinja = next(e for e in ssti._ENGINE_TABLE if e.name == "Jinja2")
+        ssti._send = self._make_server(jinja, self._ERRORS)
+        cache = {}
+        twig = next(e for e in ssti._ENGINE_TABLE if e.name == "Twig")
+        self.assertEqual(ssti._probeFamily("GET", "q", jinja, cache), "python")
+        self.assertNotEqual(ssti._probeFamily("GET", "q", twig, cache), twig.family)
+
+    def test_erb_arithmetic_works_after_format_fix(self):
+        # Direct regression guard for the '<%= %d*%d %>' / '<%= %s %>' format bug.
+        erb = next(e for e in ssti._ENGINE_TABLE if e.name == "ERB")
+        ssti._send = self._make_server(erb, self._ERRORS)
+        self.assertTrue(ssti._probeArithmetic("GET", "q", erb),
+            "ERB arithmetic proof must succeed once %-format no longer crashes on '%>'")
+        result, evidence = ssti._fingerprint("GET", "q")
+        self.assertEqual(result.name, "ERB")
+        self.assertTrue(evidence.get("arithmetic"))
+
+    def test_mako_distinguished_from_freemarker_spring(self):
+        # Mako shares '${ }' with Freemarker/Spring but renders capital True/False;
+        # it must be named exactly (via unique boolean rendering), not "probable".
+        mako = next(e for e in ssti._ENGINE_TABLE if e.name == "Mako")
+        ssti._send = self._make_server(mako, self._ERRORS)
+        result, evidence = ssti._fingerprint("GET", "q")
+        self.assertEqual(result.name, "Mako")
+        self.assertTrue(evidence.get("boolean"))
--- a/tests/test_xpath.py
+++ b/tests/test_xpath.py
@ -252,6 +252,40 @@ class TestExtraction(unittest.TestCase):
                                   maxCount=8)
        self.assertEqual(result, expected)

+    def test_infer_string_binary_search(self):
+        # Drive the binary-search extractor through real lxml evaluation of the
+        # boundary-wrapped predicates against _XML and confirm exact recovery.
+        boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
+        builder = xpath._XPathPayloadBuilder("x", boundary)
+        template = _XPATH_TEMPLATES["function_arg"]
+
+        class MockOracle(object):
+            def extract(self, payload):
+                return _xpath_eval(template, payload) > 0
+
+        oracle = MockOracle()
+        # Absolute targets are resolved the same way the live tree-walk would.
+        self.assertEqual(xpath._inferString(oracle, builder, "name(/*)", maxLen=32), "directory")
+        self.assertEqual(xpath._inferString(oracle, builder, "string(//user[1]/name)", maxLen=32), "luther")
+        self.assertEqual(xpath._inferString(oracle, builder, "string(//user[1]/@id)", maxLen=32), "1")
+
+    def test_infer_string_matches_linear(self):
+        # The fast extractor must agree with the legacy linear extractor.
+        boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
+        builder = xpath._XPathPayloadBuilder("x", boundary)
+        template = _XPATH_TEMPLATES["function_arg"]
+
+        class MockOracle(object):
+            def extract(self, payload):
+                return _xpath_eval(template, payload) > 0
+
+        oracle = MockOracle()
+        fast = xpath._inferString(oracle, builder, "name(/*)", maxLen=32)
+        linear = xpath._inferValue(oracle, builder, "/*",
+                                   lambda b, p, prefix: b.nameStartsWith(p, prefix),
+                                   maxLen=32)
+        self.assertEqual(fast, linear)
+

 class TestBackendFingerprint(unittest.TestCase):
    def test_lxml(self):
@ -323,7 +357,7 @@ class TestRealXPathSyntax(unittest.TestCase):
            "False payload '%s' should match no nodes" % falsePayload)

        # Extraction predicate must be valid and change the result truthfully
-        builder = xpath._XPathPayloadBuilder(original, boundary)
+        self.assertIsNotNone(xpath._XPathPayloadBuilder(original, boundary))
        truePred = xpath._makePayload(original, boundary, "true()")
        falsePred = xpath._makePayload(original, boundary, "false()")
        self.assertGreater(self._count(template, truePred), 0,
@ -368,7 +402,7 @@ class TestRealXPathSyntax(unittest.TestCase):
        boundary = xpath._BREAKOUT_BOUNDARY["' or '1'='1"]
        # Simulate what xpathScan() does: use a sentinel as base for OR-style
        sentinel = "zzznotpresent"
-        builder = xpath._XPathPayloadBuilder(sentinel, boundary)
+        self.assertIsNotNone(xpath._XPathPayloadBuilder(sentinel, boundary))
        truePred = xpath._makePayload(sentinel, boundary, "true()")
        falsePred = xpath._makePayload(sentinel, boundary, "false()")
        tpl = _XPATH_TEMPLATES["single_quoted"]