Adding support for better JSON comparison

This commit is contained in:
Miroslav Štampar 2026-06-16 10:02:44 +02:00
parent cc7f803d60
commit a0cbfba9bd
7 changed files with 256 additions and 32 deletions

View file

@ -1442,6 +1442,45 @@ def parseJson(content):
return retVal
def jsonMinimize(content):
"""
Returns an order-independent canonical "leaf-path" projection of a JSON document, used for
structure-aware response comparison (so key reordering / whitespace / number formatting do
not perturb the comparison ratio, while a changed value or array length does). Returns None
(and only None) when content is not parseable JSON, so callers can fall back to text comparison
>>> jsonMinimize('{"b": 2, "a": 1}') == jsonMinimize('{"a":1, "b":2}')
True
>>> jsonMinimize('{"a": {"b": 1}}') == '.a.b=1'
True
>>> jsonMinimize('not json') is None
True
>>> jsonMinimize('{}') == ''
True
"""
try:
data = json.loads(content)
except (ValueError, TypeError):
return None
lines = []
def _walk(obj, path):
if isinstance(obj, dict):
for key in sorted(obj): # sorted keys -> key-order/whitespace immune
_walk(obj[key], "%s.%s" % (path, key))
elif isinstance(obj, (list, tuple)):
lines.append("%s.__len__=%d" % (path, len(obj))) # length change always registers
for index in xrange(len(obj)): # index kept -> order-sensitive (correct for result sets)
_walk(obj[index], "%s[%d]" % (path, index))
else:
lines.append("%s=%s" % (path, obj)) # scalar values kept (boolean detection flips values)
_walk(data, "")
return "\n".join(sorted(lines))
def parsePasswordHash(password):
"""
In case of Microsoft SQL Server password hash value is expanded to its components

View file

@ -20,7 +20,7 @@ from lib.core.enums import OS
from thirdparty import six
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.10.6.118"
VERSION = "1.10.6.119"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)

View file

@ -55,6 +55,7 @@ def vulnTest():
("--dummy", ("all tested parameters do not appear to be injectable", "does not seem to be injectable", "there is not at least one", "~might be injectable")),
("-u \"<url>&id2=1\" -p id2 -v 5 --flush-session --level=5 --text-only --test-filter=\"AND boolean-based blind - WHERE or HAVING clause (MySQL comment)\"", ("~1AND",)),
("--list-tampers", ("between", "MySQL", "xforwardedfor")),
("-u \"<url>&json=1\" -p id --flush-session --technique=B --banner", ("Type: boolean-based blind", "banner: '3.")), # JSON-response detection via the structure-aware oracle (no --string hint)
("-r <request> --flush-session -v 5 --test-skip=\"heavy\" --save=<config>", ("CloudFlare", "web application technology: Express", "possible DBMS: 'SQLite'", "User-Agent: foobar", "~Type: time-based blind", "saved command line options to the configuration file")),
("-c <config>", ("CloudFlare", "possible DBMS: 'SQLite'", "User-Agent: foobar", "~Type: time-based blind")),
("-l <log> --flush-session --keep-alive --skip-waf -vvvvv --technique=U --union-from=users --banner --parse-errors", ("banner: '3.", "ORDER BY term out of range", "~xp_cmdshell", "Connection: keep-alive")),

View file

@ -11,6 +11,7 @@ import re
from lib.core.common import extractRegexResult
from lib.core.common import getFilteredPageContent
from lib.core.common import jsonMinimize
from lib.core.common import listToStrValue
from lib.core.common import removeDynamicContent
from lib.core.common import getLastRequestHTTPError
@ -20,6 +21,7 @@ from lib.core.convert import getBytes
from lib.core.data import conf
from lib.core.data import kb
from lib.core.data import logger
from lib.core.enums import HTTP_HEADER
from lib.core.exception import SqlmapNoneDataException
from lib.core.settings import DEFAULT_PAGE_ENCODING
from lib.core.settings import DIFF_TOLERANCE
@ -34,6 +36,20 @@ from lib.core.settings import URI_HTTP_HEADER
from lib.core.threads import getCurrentThreadData
from thirdparty import six
def _isJsonResponse(headers):
"""
Returns True if the response Content-Type indicates a JSON document (e.g. 'application/json'
or a structured suffix like 'application/vnd.api+json')
"""
retVal = False
if headers:
contentType = (headers.get(HTTP_HEADER.CONTENT_TYPE) or "").split(';')[0].strip().lower()
retVal = contentType == "application/json" or contentType.endswith("+json")
return retVal
def comparison(page, headers, code=None, getRatioValue=False, pageLength=None):
if not isinstance(page, (six.text_type, six.binary_type, type(None))):
logger.critical("got page of type %s; repr(page)[:200]=%s" % (type(page), repr(page)[:200]))
@ -97,6 +113,10 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
seqMatcher = threadData.seqMatcher
seqMatcher.set_seq1(kb.pageTemplate)
# raw (pre-dynamic-removal) body, kept for the structured (JSON) comparison path below;
# parsing the raw form avoids removeDynamicContent splicing JSON mid-token
rawPage = page
if page:
# In case of an DBMS error page return None
if kb.errorIsNone and (wasLastResponseDBMSError() or wasLastResponseHTTPError()) and not kb.negativeLogic:
@ -148,12 +168,22 @@ def _comparison(page, headers, code, getRatioValue, pageLength):
else:
seq1, seq2 = None, None
if conf.titles:
seq1 = extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a)
seq2 = extractRegexResult(HTML_TITLE_REGEX, page)
else:
seq1 = getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a
seq2 = getFilteredPageContent(page, True) if conf.textOnly else page
# Structure-aware comparison for JSON responses: compare an order-independent
# projection of the parsed bodies instead of raw text, so key reordering/whitespace
# noise does not perturb the ratio while a changed value/array-length does. Engages
# only on a JSON Content-Type with both bodies parseable; any doubt (or an explicit
# --text-only/--titles) falls back to the exact text path below.
if _isJsonResponse(headers) and not (conf.titles or conf.textOnly or kb.nullConnection):
seq1 = jsonMinimize(kb.pageTemplate)
seq2 = jsonMinimize(rawPage)
if seq1 is None or seq2 is None:
if conf.titles:
seq1 = extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a)
seq2 = extractRegexResult(HTML_TITLE_REGEX, page)
else:
seq1 = getFilteredPageContent(seqMatcher.a, True) if conf.textOnly else seqMatcher.a
seq2 = getFilteredPageContent(page, True) if conf.textOnly else page
if seq1 is None or seq2 is None:
return None