Minor improvements

This commit is contained in:
Miroslav Štampar 2026-06-29 22:20:22 +02:00
parent 820efa7a8a
commit 7b60bc8284
11 changed files with 626 additions and 104 deletions

View file

@ -727,5 +727,67 @@ class TestGraphqlUnicodeSafety(unittest.TestCase):
self.assertIn("caf", gi._cell(u"caf\xe9"))
class TestGraphqlSuggestionRecovery(unittest.TestCase):
"""G1: schema recovery from 'Did you mean' suggestions when introspection is disabled."""
def setUp(self):
self._gql = gi._gqlSend
def tearDown(self):
gi._gqlSend = self._gql
def test_harvest_suggestions_both_quote_styles(self):
# graphql-js uses double quotes; some servers use single quotes + Oxford 'or'
self.assertEqual(
gi._harvestSuggestions('Cannot query field "x" on type "Query". Did you mean "user" or "search"?'),
["user", "search"])
self.assertEqual(
gi._harvestSuggestions("Cannot query field 'x' on type 'Query'. Did you mean 'user', 'me', or 'node'?"),
["user", "me", "node"])
self.assertEqual(gi._harvestSuggestions("no suggestion here"), [])
def test_suggest_fields_from_validation_errors(self):
# An unknown field elicits the closest real field names (graphql-js phrasing)
def fake(endpoint, query, variables=None):
if "{ user }" in query or "{user}" in query:
return '{"data":{"user":null}}', 200 # 'user' is a real (resolving) field
return ('{"errors":[{"message":"Cannot query field \\"%s\\" on type \\"Query\\". '
'Did you mean \\"user\\", \\"search\\" or \\"login\\"?"}]}'
% "zz", 200)
gi._gqlSend = fake
fields = gi._suggestFields("http://t/graphql", "query")
for expected in ("user", "search", "login"):
self.assertIn(expected, fields)
def test_suggest_args_from_unknown_argument(self):
def fake(endpoint, query, variables=None):
return ('{"errors":[{"message":"Unknown argument \\"zz\\" on field \\"Query.user\\". '
'Did you mean \\"username\\"?"}]}', 200)
gi._gqlSend = fake
self.assertIn("username", gi._suggestArgs("http://t/graphql", "query", "user"))
def test_introspect_via_suggestions_builds_slots(self):
def fake(endpoint, query, variables=None):
# introspection-style queries already filtered upstream; here every unknown field
# yields the same suggestion set, and 'search' resolves as a real field
if "{ search }" in query or "{search}" in query:
return '{"data":{"search":[]}}', 200
if "Unknown argument" in query: # never matches; args fall back to wordlist
return '{}', 200
return ('{"errors":[{"message":"Cannot query field \\"zz\\" on type \\"Query\\". '
'Did you mean \\"search\\"?"}]}', 200)
gi._gqlSend = fake
slots = gi._introspectViaSuggestions("http://t/graphql")
self.assertIsNotNone(slots)
self.assertTrue(any(s.fieldName == "search" for s in slots))
self.assertTrue(all(s.strategy == "string" for s in slots))
def test_introspect_via_suggestions_none_without_suggestions(self):
def fake(endpoint, query, variables=None):
return '{"errors":[{"message":"Syntax Error: unexpected token"}]}', 200
gi._gqlSend = fake
self.assertIsNone(gi._introspectViaSuggestions("http://t/graphql"))
if __name__ == "__main__":
unittest.main()

View file

@ -313,10 +313,13 @@ class TestBooleanUniqueness(unittest.TestCase):
jinja2 = ssti._ENGINE_TABLE[0]
self.assertTrue(ssti._booleanUniquelyIdentifies(jinja2))
def test_freemarker_boolean_not_unique(self):
def test_freemarker_boolean_unique_with_computer_format(self):
freemarker = [e for e in ssti._ENGINE_TABLE if e.name == "Freemarker"][0]
# Freemarker and SpringEL both use ("${}", "true", "false") signature
self.assertFalse(ssti._booleanUniquelyIdentifies(freemarker))
# FreeMarker uses ${true?c} (computer-format), distinct from SpringEL's ${true} and
# Mako's ${True}, so its boolean rendering now uniquely identifies it within the ${ } family
self.assertTrue(ssti._booleanUniquelyIdentifies(freemarker))
spring = [e for e in ssti._ENGINE_TABLE if "Spring" in e.name][0]
self.assertTrue(ssti._booleanUniquelyIdentifies(spring))
def test_jinja2_with_arithmetic_and_boolean_is_exact(self):
"""Arithmetic + boolean (unique) should produce exact engine name,
@ -467,3 +470,142 @@ class TestCommandEscaping(unittest.TestCase):
self.assertEqual(ssti._escapeSingleQuoted("hello"), "hello")
self.assertEqual(ssti._escapeSingleQuoted("it's"), "it\\'s")
self.assertEqual(ssti._escapeSingleQuoted("a\\b"), "a\\\\b")
class TestEngineMatrix(unittest.TestCase):
"""For EVERY engine in the table, stand up a faithful mock server running that
engine and assert _fingerprint() identifies it. This proves each engine's full
detection path (arithmetic/boolean/error/distinguishing) actually works end to
end - not just Jinja2 - and guards against regressions like the ERB '%>' format
bug where a delimiter containing '%' silently disabled arithmetic detection."""
def setUp(self):
self.original_send = ssti._send
def tearDown(self):
ssti._send = self.original_send
# Digit-free, boolean-word-free sample errors that match each engine's errorRegex.
# (digit/boolean-free so a sibling engine's boolean probe falling through to the error
# branch on this server is still correctly rejected.)
_ERRORS = {
"Jinja2": "jinja2.exceptions.TemplateSyntaxError: unexpected end of template",
"Mako": "mako.exceptions.SyntaxException: unclosed control structure",
"Twig": "Twig_Error_Syntax: unexpected token in template",
"Freemarker": "freemarker.core.ParseException: encountered unexpected directive",
"Velocity": "org.apache.velocity.runtime.parser.ParseErrorException: encountered eof",
"Spring EL / Thymeleaf": "org.springframework.expression.spel.SpelParseException: bad node",
"ERB": "(erb): syntax error, unexpected end-of-input",
"Pug/Jade": "pug: unexpected token in template",
"Handlebars": "Handlebars: Parse error on line one",
}
# Real divide-by-zero error text per language family (captured from live Mako/ERB/Jinja2
# backends), so the S2 family probe can be exercised. JS yields Infinity (no error).
_DIVZERO = {
"python": "ZeroDivisionError: division by zero",
"ruby": "ZeroDivisionError: divided by 0",
"php": "DivisionByZeroError: Division by zero",
"java": "java.lang.ArithmeticException: / by zero",
"nodejs": "Hello Infinity",
}
@staticmethod
def _make_server(engine, errors):
import re
op = re.escape(engine.delimiter)
cl = re.escape(engine.delimiterClose)
arithRe = re.compile(op + r"\s*(\d+)\s*\*\s*(\d+)\s*" + cl) if engine.arithmeticFmt else None
divZero = TestEngineMatrix._DIVZERO
err = errors.get(engine.name)
def server(place, parameter, value):
# 1) engine-specific distinguishing probe
if engine.distinguishingProbe and engine.distinguishingProbe in value:
if engine.distinguishingResult:
return "Hello " + engine.distinguishingResult
return "Hello" # comment-style probe -> stays at baseline
# 2) this engine's own boolean rendering
if engine.booleanTrue and engine.booleanTrue in value:
return "Hello " + engine.trueRendered
if engine.booleanFalse and engine.booleanFalse in value:
return "Hello " + engine.falseRendered
# 3) divide-by-zero -> language-family-specific error (S2), for engines that evaluate it
if arithRe is not None and (engine.delimiter + "1/0" + engine.delimiterClose) in value:
return divZero.get(engine.family, "Hello")
# 4) arithmetic, but ONLY for engines that actually evaluate it
if arithRe is not None:
m = arithRe.search(value)
if m:
return "Hello %d" % (int(m.group(1)) * int(m.group(2)))
# 5) malformed fragment in this engine's delimiter -> engine-specific error
if err and any(p in value for p in engine.errorProbes):
return err
# 6) anything else (incl. other engines' payloads) renders inertly
return "Hello"
return server
def test_every_engine_is_fingerprinted(self):
for engine in ssti._ENGINE_TABLE:
ssti._send = self._make_server(engine, self._ERRORS)
result, evidence = ssti._fingerprint("GET", "q")
self.assertIsNotNone(result, "engine '%s' was not detected at all" % engine.name)
self.assertIn(engine.name, result.name,
"server running '%s' was identified as '%s'" % (engine.name, result.name))
def test_family_probe_confirms_language(self):
# S2: the divide-by-zero probe must confirm the backend family for every
# expression-evaluating, non-JS engine (Python/Ruby/PHP/Java).
for engine in ssti._ENGINE_TABLE:
if not (engine.arithmeticFmt and engine.delimiterClose):
continue
if engine.family not in ("python", "ruby", "php", "java"):
continue
ssti._send = self._make_server(engine, self._ERRORS)
_result, evidence = ssti._fingerprint("GET", "q")
self.assertTrue(evidence.get("family"),
"family probe should confirm '%s' on a %s backend" % (engine.name, engine.family))
def test_filter_evasion_rce_fallbacks_present(self):
# S3: each engine must retain its filter-evasion / sandbox-escape RCE fallbacks.
def rce(name):
return " ".join(p for p, _d in next(e for e in ssti._ENGINE_TABLE if e.name == name).rcePayloads)
jinja = rce("Jinja2")
self.assertIn("attr(", jinja) # dot/underscore-free attr() chain
self.assertIn("\\x5f", jinja) # hex-escaped dunders
twig = rce("Twig")
self.assertIn("sort('system')", twig)
self.assertIn("map('system')", twig)
spring = rce("Spring EL / Thymeleaf")
self.assertIn("readLine", spring) # output-capturing SpEL
self.assertIn("@java.lang.Runtime@getRuntime", spring) # OGNL fallback
def test_family_probe_does_not_crossmatch(self):
# Python 'division by zero' must NOT satisfy the (case-sensitive) PHP signature, so a
# Jinja2/Python server never lets Twig/PHP claim a family match.
jinja = next(e for e in ssti._ENGINE_TABLE if e.name == "Jinja2")
ssti._send = self._make_server(jinja, self._ERRORS)
cache = {}
twig = next(e for e in ssti._ENGINE_TABLE if e.name == "Twig")
self.assertEqual(ssti._probeFamily("GET", "q", jinja, cache), "python")
self.assertNotEqual(ssti._probeFamily("GET", "q", twig, cache), twig.family)
def test_erb_arithmetic_works_after_format_fix(self):
# Direct regression guard for the '<%= %d*%d %>' / '<%= %s %>' format bug.
erb = next(e for e in ssti._ENGINE_TABLE if e.name == "ERB")
ssti._send = self._make_server(erb, self._ERRORS)
self.assertTrue(ssti._probeArithmetic("GET", "q", erb),
"ERB arithmetic proof must succeed once %-format no longer crashes on '%>'")
result, evidence = ssti._fingerprint("GET", "q")
self.assertEqual(result.name, "ERB")
self.assertTrue(evidence.get("arithmetic"))
def test_mako_distinguished_from_freemarker_spring(self):
# Mako shares '${ }' with Freemarker/Spring but renders capital True/False;
# it must be named exactly (via unique boolean rendering), not "probable".
mako = next(e for e in ssti._ENGINE_TABLE if e.name == "Mako")
ssti._send = self._make_server(mako, self._ERRORS)
result, evidence = ssti._fingerprint("GET", "q")
self.assertEqual(result.name, "Mako")
self.assertTrue(evidence.get("boolean"))

View file

@ -252,6 +252,40 @@ class TestExtraction(unittest.TestCase):
maxCount=8)
self.assertEqual(result, expected)
def test_infer_string_binary_search(self):
# Drive the binary-search extractor through real lxml evaluation of the
# boundary-wrapped predicates against _XML and confirm exact recovery.
boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
builder = xpath._XPathPayloadBuilder("x", boundary)
template = _XPATH_TEMPLATES["function_arg"]
class MockOracle(object):
def extract(self, payload):
return _xpath_eval(template, payload) > 0
oracle = MockOracle()
# Absolute targets are resolved the same way the live tree-walk would.
self.assertEqual(xpath._inferString(oracle, builder, "name(/*)", maxLen=32), "directory")
self.assertEqual(xpath._inferString(oracle, builder, "string(//user[1]/name)", maxLen=32), "luther")
self.assertEqual(xpath._inferString(oracle, builder, "string(//user[1]/@id)", maxLen=32), "1")
def test_infer_string_matches_linear(self):
# The fast extractor must agree with the legacy linear extractor.
boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
builder = xpath._XPathPayloadBuilder("x", boundary)
template = _XPATH_TEMPLATES["function_arg"]
class MockOracle(object):
def extract(self, payload):
return _xpath_eval(template, payload) > 0
oracle = MockOracle()
fast = xpath._inferString(oracle, builder, "name(/*)", maxLen=32)
linear = xpath._inferValue(oracle, builder, "/*",
lambda b, p, prefix: b.nameStartsWith(p, prefix),
maxLen=32)
self.assertEqual(fast, linear)
class TestBackendFingerprint(unittest.TestCase):
def test_lxml(self):
@ -323,7 +357,7 @@ class TestRealXPathSyntax(unittest.TestCase):
"False payload '%s' should match no nodes" % falsePayload)
# Extraction predicate must be valid and change the result truthfully
builder = xpath._XPathPayloadBuilder(original, boundary)
self.assertIsNotNone(xpath._XPathPayloadBuilder(original, boundary))
truePred = xpath._makePayload(original, boundary, "true()")
falsePred = xpath._makePayload(original, boundary, "false()")
self.assertGreater(self._count(template, truePred), 0,
@ -368,7 +402,7 @@ class TestRealXPathSyntax(unittest.TestCase):
boundary = xpath._BREAKOUT_BOUNDARY["' or '1'='1"]
# Simulate what xpathScan() does: use a sentinel as base for OR-style
sentinel = "zzznotpresent"
builder = xpath._XPathPayloadBuilder(sentinel, boundary)
self.assertIsNotNone(xpath._XPathPayloadBuilder(sentinel, boundary))
truePred = xpath._makePayload(sentinel, boundary, "true()")
falsePred = xpath._makePayload(sentinel, boundary, "false()")
tpl = _XPATH_TEMPLATES["single_quoted"]