From 57dcc04cbe2896784a487e187be79b7f9ae78aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0tampar?= Date: Tue, 23 Jun 2026 22:28:19 +0200 Subject: [PATCH] Minor improvements --- data/txt/sha256sums.txt | 8 ++--- lib/core/settings.py | 10 +++++- lib/techniques/blind/inference.py | 11 +++++++ lib/utils/dialect.py | 43 +++++++++++++++---------- tests/test_dialectdbms.py | 52 ++++++++++++++++++++++--------- 5 files changed, 87 insertions(+), 37 deletions(-) diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index 7576be296..ee05cd0f8 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -189,7 +189,7 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch 9bf174058f15d14e24e94f9aaf42df045119d3617c6c54bd2f3af79b462f331d lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -90a49806b83a83f6402b3dd6e35f7f2468d3dbcc0cafc3c382bda6e248344609 lib/core/settings.py +527ee951185f691c68638f03d4da8f9bc894a93f1a791865fc2cc0992ad5f03e lib/core/settings.py c7804223319e18eb0b8e2cbf0a8b6896d1cefb7b0b1a2e9f1cf826a8a3b56750 lib/core/shell.py a2e98a94b231432736d6b304fc75525c8b5fdb4768c418387c5b4c1a610dad64 lib/core/subprocessng.py 19f1e3c5e3ba703d28d510cd7a9ab8284d5fbe9df5ce7e77c86e5931571364b7 lib/core/target.py @@ -232,7 +232,7 @@ f522436fbd14bdab090a1d305fcac0361800cb8e36c8cbcb47933298376a71e0 lib/takeover/r 0787f78e6bd9bb21d4267c95c4c99806711bb57c5518485c2e25f10fcf9c41fc lib/takeover/udf.py 23d73af417604dab460b74cdc230896153f018a6c00d144019491053640a172f lib/takeover/web.py 8cc1e226d4150fe8aa1a056e5d32d858ed6444d3d4e2af7fb4bc08f0bbe9d527 lib/takeover/xp_cmdshell.py -63e2bc0e2fb6407760245b4f36d7430b626b9654bce51485b6cbf24717225246 lib/techniques/blind/inference.py +a66a4b9df6207dce722c9b71d290ea426723cb4b697b416065dc7dd5db96fe8e lib/techniques/blind/inference.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/blind/__init__.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/techniques/dns/__init__.py 3df9839fb92a81d46b6194d7adacb43f391efb78b071783c132e8d596ecbfaf1 lib/techniques/dns/test.py @@ -247,7 +247,7 @@ aeefb42ea0c68f72744bc1bfd7194ec1bc06480d8a7e23f4b8d3d23fbba2b014 lib/utils/api. 442555ab85277aff7c9e0cf465ea5b0d28395c326f68363449b2d3941f4b6de2 lib/utils/brute.py da5bcbcda3f667582adf5db8c1b5d511b469ac61b55d387cec66de35720ed718 lib/utils/crawler.py a94958be0ec3e9d28d8171813a6a90655a9ad7e6aa33c661e8d8ebbfcf208dbb lib/utils/deps.py -0fd055877e8b21d17c11447dac7f91ef1766e0b04d470c494a6d98f5249e3186 lib/utils/dialect.py +b0d8ae8513c1f5ffcaa4bf0398790f26bc2180a6acf07bf5b2c86555bf9113f6 lib/utils/dialect.py 51cfab194cd5b6b24d62706fb79db86c852b9e593f4c55c15b35f175e70c9d75 lib/utils/getch.py 853c3595e1d2efc54b8bfb6ab12c55d1efc1603be266978e3a7d96d553d91a52 lib/utils/gui.py 972c5db9c9e30ac0f91c0f8d4df4531d0304e151dac99f1399c37c952ba9f935 lib/utils/har.py @@ -584,7 +584,7 @@ a48c411fea864e6bcd6a1c7e1a35094b8cda8d15088fd9e7b0270542ae20daa9 tests/test_com c17544be5e945dc8c4fbb5c3b922da8eceec30b0fb239c32fb5f40e1660a197f tests/test_datafiles.py 9c240d4f796e56376374d4ce46f358ceb7d48cc6a7427760c5bfb89ff01cb545 tests/test_datatypes.py 3804eb2d730220360f9dc07d5994eb64e9f65acf3b0d8648df8df2a2177ba8fd tests/test_decodepage.py -9c0a0cd0b2d52a53f75c98c60f87a022354b7c3dc4baaf3fe1e272a0af5b7f0a tests/test_dialectdbms.py +b6d8a4bc9c46a332a2dc7b3cf862ea67e38b5c5701cfd8eb3556021f6b611416 tests/test_dialectdbms.py e40a49cfa73c45b3c3c6d1d1d00738861e270cb7a07b28f5a5356f9c7c800cf2 tests/test_dialect.py 993a2d4d87c4fbaf261663b069629acc95ee4405aa0c42cf5a8f39649fdb0fff tests/test_dicts.py 7f12466974394312dad3d98651ef8a50d1585bee0f8cd25da0b77b08c2047e46 tests/test_dns_engine.py diff --git a/lib/core/settings.py b/lib/core/settings.py index b00474f0c..00e323109 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.152" +VERSION = "1.10.6.153" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) @@ -533,6 +533,14 @@ for _weight, _chars in ((6, " etaoinsrhldcumfgypwbvkxjqz"), (4, "0123456789"), ( for _char in _chars: HUFFMAN_PRIOR_WEIGHTS[ord(_char)] = _weight +# Bounds for feeding extracted values back into the "good samaritan" (--predict-output) common-output +# pool for their enumeration context, so later same-context items that share structure (e.g. +# wp_posts / wp_users / wp_options ...) are predicted faster. MAX_LENGTH keeps large data cells from +# bloating/polluting the pool (identifiers are short); MAX_ITEMS bounds per-context growth so a huge +# enumeration cannot make the per-character prediction scan costly. Misses always fall back to bisection. +PREDICTION_FEEDBACK_MAX_LENGTH = 128 +PREDICTION_FEEDBACK_MAX_ITEMS = 10000 + # Minimum range between minimum and maximum of statistical set MIN_STATISTICAL_RANGE = 0.01 diff --git a/lib/techniques/blind/inference.py b/lib/techniques/blind/inference.py index 3b2020233..46a99430c 100644 --- a/lib/techniques/blind/inference.py +++ b/lib/techniques/blind/inference.py @@ -44,6 +44,8 @@ from lib.core.exception import SqlmapUnsupportedFeatureException from lib.core.settings import CHAR_INFERENCE_MARK from lib.core.settings import HUFFMAN_PROBE_LIMIT from lib.core.settings import HUFFMAN_PRIOR_WEIGHTS +from lib.core.settings import PREDICTION_FEEDBACK_MAX_ITEMS +from lib.core.settings import PREDICTION_FEEDBACK_MAX_LENGTH from lib.core.settings import INFERENCE_BLANK_BREAK from lib.core.settings import INFERENCE_EQUALS_CHAR from lib.core.settings import INFERENCE_GREATER_CHAR @@ -828,6 +830,15 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None finalValue = decodeDbmsHexValue(finalValue) if conf.hexConvert else finalValue if not (conf.firstChar or conf.lastChar): # Note: --first/--last give a range-limited (non-complete) output; caching it unmarked would let a later resume serve the truncated value as the full one hashDBWrite(expression, finalValue) + + # Adaptive intra-run prediction (good samaritan / --predict-output): remember this extracted + # value for its enumeration context so later same-context items sharing structure are predicted + # faster. Length-capped (identifiers are short -> large data cells never bloat/pollute the pool); + # a wrong prediction only ever costs a probe and falls back to bisection. + if (conf.predictOutput and kb.partRun and kb.commonOutputs is not None + and 0 < len(finalValue) <= PREDICTION_FEEDBACK_MAX_LENGTH + and len(kb.commonOutputs.get(kb.partRun) or ()) < PREDICTION_FEEDBACK_MAX_ITEMS): + kb.commonOutputs.setdefault(kb.partRun, set()).add(finalValue) elif partialValue: hashDBWrite(expression, "%s%s" % (PARTIAL_VALUE_MARKER if not conf.hexConvert else PARTIAL_HEX_VALUE_MARKER, partialValue)) diff --git a/lib/utils/dialect.py b/lib/utils/dialect.py index 1d225c3d2..3be67eac8 100644 --- a/lib/utils/dialect.py +++ b/lib/utils/dialect.py @@ -28,23 +28,28 @@ from lib.request.inject import checkBooleanExpression # OTHER valid rows, which sqlmap's fuzzy page comparison conflates with the anchor row, producing # false positives. See PROVE_DESIGN.md.) # -# Truth table measured on a live OWASP-CRS platform across 11 engines (MySQL, MariaDB/TiDB, -# PostgreSQL, CockroachDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse, H2, HSQLDB, Derby); -# only the zero-false-positive rules are kept (see _classify). With anchor value 2: +# Truth table measured on a live OWASP-CRS platform across 16 engines (MySQL/MySQL5, MariaDB/TiDB, +# PostgreSQL, CockroachDB, CrateDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse, H2, HSQLDB, +# Derby, MonetDB, IRIS, Trino); only the zero-false-positive rules are kept (see _classify). With +# anchor value 2: # -# * 2^0=2 -> '^' is bitwise XOR (MySQL/MSSQL: 2^0=2) vs exponentiation (PostgreSQL: 2^0=1) vs -# no such operator (SQLite/Oracle/... -> error, so false) -# * 2^3=8 -> '^' is exponentiation (PostgreSQL/CockroachDB: 2^3=8) - false for XOR dialects +# * 2^0=2 -> '^' is bitwise XOR (MySQL/MSSQL/MonetDB: 2^0=2) vs exponentiation (PostgreSQL: 2^0=1) +# vs no such operator (SQLite/Oracle/... -> error, so false) +# * 2^3=8 -> '^' is exponentiation (PostgreSQL/CockroachDB/CrateDB: 2^3=8) - false for XOR dialects # (2^3=1) and erroring dialects; a positive PostgreSQL-family marker. CAVEAT: # '^'=exponentiation is not strictly unique to PostgreSQL - MS Access/Jet and DuckDB # also use it (neither on the platform), so this can read as PostgreSQL there. -# * 5/2=2 -> integer division (PostgreSQL/MSSQL/SQLite) vs real division (MySQL/Oracle: 2.5) +# * 5/2=2 -> integer division (PostgreSQL/MSSQL/SQLite/MonetDB) vs real division (MySQL/Oracle: 2.5) # * 2|0=2 -> a bitwise OR operator exists (absent in Firebird/Oracle/ClickHouse/H2) +# * 1<<2=4 -> a bit-shift operator exists. MonetDB shares MSSQL's (xor, intdiv) = (True, True) +# signature exactly, which would misread MonetDB as SQL Server; MonetDB HAS '<<' while +# SQL Server has NO shift operator (any version) -> this probe splits that one collision. DIALECT_PROBES = ( ("xor", "2^0=2"), ("pgpow", "2^3=8"), ("intdiv", "5/2=2"), ("bitor", "2|0=2"), + ("shift", "1<<2=4"), ) def _classify(signature): @@ -58,28 +63,32 @@ def _classify(signature): all-false signature, which a minimal engine like ClickHouse/H2/Firebird/HSQLDB/Derby or a fully WAF-blocked channel also produces) deliberately fall through to None: - >>> _classify((True, False, False, True)) # MySQL / MariaDB / TiDB + >>> _classify((True, False, False, True, True)) # MySQL / MariaDB / TiDB 'MySQL' - >>> _classify((True, False, True, True)) # Microsoft SQL Server + >>> _classify((True, False, True, True, False)) # Microsoft SQL Server (no bit-shift) 'Microsoft SQL Server' - >>> _classify((False, True, True, True)) # PostgreSQL + >>> _classify((True, False, True, True, True)) # MonetDB (same xor/intdiv as MSSQL, but has '<<') + 'MonetDB' + >>> _classify((False, True, True, True, False)) # PostgreSQL 'PostgreSQL' - >>> _classify((False, True, False, True)) # CockroachDB (pgwire) -> PostgreSQL family + >>> _classify((False, True, False, True, False)) # CockroachDB (pgwire) -> PostgreSQL family 'PostgreSQL' - >>> _classify((False, False, True, True)) # SQLite + >>> _classify((False, False, True, True, True)) # SQLite 'SQLite' - >>> _classify((False, False, True, False)) is None # Firebird/HSQLDB/Derby/H2 -> no prior + >>> _classify((False, False, True, False, False)) is None # Firebird/HSQLDB/Derby/H2/Trino -> no prior True - >>> _classify((False, False, False, False)) is None # all-false (Oracle/ClickHouse/blocked) -> no prior + >>> _classify((False, False, False, False, False)) is None # all-false (Oracle/ClickHouse/IRIS/blocked) -> no prior True """ - xor, pgpow, intdiv, bitor = signature + xor, pgpow, intdiv, bitor, shift = signature if pgpow: # '^' is exponentiation -> PostgreSQL family return DBMS.PGSQL - if xor and intdiv: # '^' is XOR AND integer division -> SQL Server - return DBMS.MSSQL + if xor and intdiv: # '^' is XOR AND integer division -> SQL Server ... + # ... except MonetDB shares this exact signature; it alone has a working bit-shift operator + # ('1<<2=4'), SQL Server has none -> split the collision (measured zero-FP across 16 engines). + return DBMS.MONETDB if shift else DBMS.MSSQL if xor and not intdiv: # '^' is XOR AND real division -> MySQL family return DBMS.MYSQL if not xor and intdiv and bitor: # no '^', integer division, bitwise '|' -> SQLite diff --git a/tests/test_dialectdbms.py b/tests/test_dialectdbms.py index 6b464cbc5..81de07ece 100644 --- a/tests/test_dialectdbms.py +++ b/tests/test_dialectdbms.py @@ -28,46 +28,68 @@ from lib.utils.dialect import _classify from lib.utils.dialect import dialectCheckDbms # measured 2026-06 across the sqli-platform (boolean form "id=2 AND ", anchor value 2); -# signature = (2^0=2, 2^3=8, 5/2=2, 2|0=2) +# base signature = (2^0=2, 2^3=8, 5/2=2, 2|0=2). The 5th probe (1<<2=4, bit-shift) is the MonetDB-vs- +# SQL Server disambiguator and is asserted separately (SHIFT_SENSITIVE); for every other engine the +# shift flag does NOT change the classification, which the test proves by trying it both ways. MEASURED = { "mysql": ((True, False, False, True), DBMS.MYSQL), + "mysql5": ((True, False, False, True), DBMS.MYSQL), "tidb": ((True, False, False, True), DBMS.MYSQL), # MySQL wire-compatible - "mssql": ((True, False, True, True), DBMS.MSSQL), "postgres": ((False, True, True, True), DBMS.PGSQL), "cockroach": ((False, True, False, True), DBMS.PGSQL), # pgwire (exponent '^', decimal division) + "cratedb": ((False, True, True, True), DBMS.PGSQL), # pgwire family "sqlite": ((False, False, True, True), DBMS.SQLITE), # not distinctive enough -> deliberately no prior (operators alone can't safely separate these) "firebird": ((False, False, True, False), None), "hsqldb": ((False, False, True, False), None), # collides with firebird/derby/h2 "derby": ((False, False, True, False), None), "h2": ((False, False, True, False), None), + "trino": ((False, False, True, False), None), + "iris": ((False, False, False, False), None), # all-error, like Oracle/broken channel "clickhouse": ((False, False, False, False), None), # all-error, like Oracle/broken channel } +# engines whose full 5-probe signature (incl. 1<<2=4) is needed because they share base-4 (xor,intdiv) +# and only the bit-shift probe separates them: SQL Server has no shift operator, MonetDB does. +SHIFT_SENSITIVE = { + "mssql": ((True, False, True, True, False), DBMS.MSSQL), + "monetdb": ((True, False, True, True, True), DBMS.MONETDB), +} + class TestDialectClassification(unittest.TestCase): - def test_measured_engines_map_as_expected(self): - for engine, (signature, expected) in MEASURED.items(): + def test_shift_sensitive_engines_split_correctly(self): + # MonetDB shared MSSQL's (xor, intdiv) signature exactly (a false positive before the shift + # probe); 1<<2=4 (MonetDB only) now separates them. + for engine, (signature, expected) in SHIFT_SENSITIVE.items(): self.assertEqual(_classify(signature), expected, "engine %r misclassified" % engine) + def test_measured_engines_map_as_expected(self): + # for non-shift-sensitive engines the shift flag is irrelevant: assert BOTH values map to the + # expected DBMS (proves the new probe never perturbs the existing classifications). + for engine, (base, expected) in MEASURED.items(): + for shift in (False, True): + self.assertEqual(_classify(base + (shift,)), expected, "engine %r misclassified (shift=%s)" % (engine, shift)) + def test_no_false_positive_across_measured_set(self): - # ambiguous engines must not borrow a major-DBMS identity; concrete ones must stay in range - for engine, (signature, expected) in MEASURED.items(): - result = _classify(signature) - if expected is None: - self.assertIsNone(result, "ambiguous engine %r leaked a DBMS prior" % engine) - else: - self.assertIn(result, (DBMS.MYSQL, DBMS.MSSQL, DBMS.PGSQL, DBMS.SQLITE, DBMS.ORACLE)) + for engine, (base, expected) in MEASURED.items(): + for shift in (False, True): + result = _classify(base + (shift,)) + if expected is None: + self.assertIsNone(result, "ambiguous engine %r leaked a DBMS prior" % engine) + else: + self.assertIn(result, (DBMS.MYSQL, DBMS.MSSQL, DBMS.PGSQL, DBMS.SQLITE, DBMS.MONETDB, DBMS.ORACLE)) def test_all_error_signature_yields_no_prior(self): - # an all-error signature (Oracle, ClickHouse, or simply a WAF-blocked channel) is not + # an all-error signature (Oracle, ClickHouse, IRIS, or simply a WAF-blocked channel) is not # distinctive enough - it must NOT be guessed as any DBMS - self.assertIsNone(_classify((False, False, False, False))) + self.assertIsNone(_classify((False, False, False, False, False))) + self.assertIsNone(_classify((False, False, False, False, True))) def test_pgpow_dominates_as_postgres_marker(self): # exponentiation '^' is a positive PostgreSQL-family marker regardless of division flavour - self.assertEqual(_classify((False, True, True, True)), DBMS.PGSQL) - self.assertEqual(_classify((False, True, False, True)), DBMS.PGSQL) + self.assertEqual(_classify((False, True, True, True, False)), DBMS.PGSQL) + self.assertEqual(_classify((False, True, False, True, False)), DBMS.PGSQL) class TestDialectCheckDbmsGuard(unittest.TestCase):