Minor improvements

2026-06-28 12:31:00 +00:00 · 2026-06-23 22:28:19 +02:00 · 2026-06-23 22:28:19 +02:00 · 57dcc04cbe
commit 57dcc04cbe
parent da66f1b3ec
5 changed files with 87 additions and 37 deletions
--- a/lib/core/settings.py
+++ b/lib/core/settings.py
@ -20,7 +20,7 @@ from lib.core.enums import OS
 from thirdparty import six

 # sqlmap version (<major>.<minor>.<month>.<monthly commit>)
-VERSION = "1.10.6.152"
+VERSION = "1.10.6.153"
 TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
 TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
 VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
@ -533,6 +533,14 @@ for _weight, _chars in ((6, " etaoinsrhldcumfgypwbvkxjqz"), (4, "0123456789"), (
    for _char in _chars:
        HUFFMAN_PRIOR_WEIGHTS[ord(_char)] = _weight

+# Bounds for feeding extracted values back into the "good samaritan" (--predict-output) common-output
+# pool for their enumeration context, so later same-context items that share structure (e.g.
+# wp_posts / wp_users / wp_options ...) are predicted faster. MAX_LENGTH keeps large data cells from
+# bloating/polluting the pool (identifiers are short); MAX_ITEMS bounds per-context growth so a huge
+# enumeration cannot make the per-character prediction scan costly. Misses always fall back to bisection.
+PREDICTION_FEEDBACK_MAX_LENGTH = 128
+PREDICTION_FEEDBACK_MAX_ITEMS = 10000
+
 # Minimum range between minimum and maximum of statistical set
 MIN_STATISTICAL_RANGE = 0.01

--- a/lib/techniques/blind/inference.py
+++ b/lib/techniques/blind/inference.py
@ -44,6 +44,8 @@ from lib.core.exception import SqlmapUnsupportedFeatureException
 from lib.core.settings import CHAR_INFERENCE_MARK
 from lib.core.settings import HUFFMAN_PROBE_LIMIT
 from lib.core.settings import HUFFMAN_PRIOR_WEIGHTS
+from lib.core.settings import PREDICTION_FEEDBACK_MAX_ITEMS
+from lib.core.settings import PREDICTION_FEEDBACK_MAX_LENGTH
 from lib.core.settings import INFERENCE_BLANK_BREAK
 from lib.core.settings import INFERENCE_EQUALS_CHAR
 from lib.core.settings import INFERENCE_GREATER_CHAR
@ -828,6 +830,15 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None
            finalValue = decodeDbmsHexValue(finalValue) if conf.hexConvert else finalValue
            if not (conf.firstChar or conf.lastChar):  # Note: --first/--last give a range-limited (non-complete) output; caching it unmarked would let a later resume serve the truncated value as the full one
                hashDBWrite(expression, finalValue)
+
+            # Adaptive intra-run prediction (good samaritan / --predict-output): remember this extracted
+            # value for its enumeration context so later same-context items sharing structure are predicted
+            # faster. Length-capped (identifiers are short -> large data cells never bloat/pollute the pool);
+            # a wrong prediction only ever costs a probe and falls back to bisection.
+            if (conf.predictOutput and kb.partRun and kb.commonOutputs is not None
+                    and 0 < len(finalValue) <= PREDICTION_FEEDBACK_MAX_LENGTH
+                    and len(kb.commonOutputs.get(kb.partRun) or ()) < PREDICTION_FEEDBACK_MAX_ITEMS):
+                kb.commonOutputs.setdefault(kb.partRun, set()).add(finalValue)
        elif partialValue:
            hashDBWrite(expression, "%s%s" % (PARTIAL_VALUE_MARKER if not conf.hexConvert else PARTIAL_HEX_VALUE_MARKER, partialValue))

--- a/lib/utils/dialect.py
+++ b/lib/utils/dialect.py
@ -28,23 +28,28 @@ from lib.request.inject import checkBooleanExpression
 # OTHER valid rows, which sqlmap's fuzzy page comparison conflates with the anchor row, producing
 # false positives. See PROVE_DESIGN.md.)
 #
-# Truth table measured on a live OWASP-CRS platform across 11 engines (MySQL, MariaDB/TiDB,
-# PostgreSQL, CockroachDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse, H2, HSQLDB, Derby);
-# only the zero-false-positive rules are kept (see _classify). With anchor value 2:
+# Truth table measured on a live OWASP-CRS platform across 16 engines (MySQL/MySQL5, MariaDB/TiDB,
+# PostgreSQL, CockroachDB, CrateDB, Microsoft SQL Server, SQLite, Firebird, ClickHouse, H2, HSQLDB,
+# Derby, MonetDB, IRIS, Trino); only the zero-false-positive rules are kept (see _classify). With
+# anchor value 2:
 #
-#   * 2^0=2  -> '^' is bitwise XOR (MySQL/MSSQL: 2^0=2) vs exponentiation (PostgreSQL: 2^0=1) vs
-#              no such operator (SQLite/Oracle/... -> error, so false)
-#   * 2^3=8  -> '^' is exponentiation (PostgreSQL/CockroachDB: 2^3=8) - false for XOR dialects
+#   * 2^0=2  -> '^' is bitwise XOR (MySQL/MSSQL/MonetDB: 2^0=2) vs exponentiation (PostgreSQL: 2^0=1)
+#              vs no such operator (SQLite/Oracle/... -> error, so false)
+#   * 2^3=8  -> '^' is exponentiation (PostgreSQL/CockroachDB/CrateDB: 2^3=8) - false for XOR dialects
 #              (2^3=1) and erroring dialects; a positive PostgreSQL-family marker. CAVEAT:
 #              '^'=exponentiation is not strictly unique to PostgreSQL - MS Access/Jet and DuckDB
 #              also use it (neither on the platform), so this can read as PostgreSQL there.
-#   * 5/2=2  -> integer division (PostgreSQL/MSSQL/SQLite) vs real division (MySQL/Oracle: 2.5)
+#   * 5/2=2  -> integer division (PostgreSQL/MSSQL/SQLite/MonetDB) vs real division (MySQL/Oracle: 2.5)
 #   * 2|0=2  -> a bitwise OR operator exists (absent in Firebird/Oracle/ClickHouse/H2)
+#   * 1<<2=4 -> a bit-shift operator exists. MonetDB shares MSSQL's (xor, intdiv) = (True, True)
+#              signature exactly, which would misread MonetDB as SQL Server; MonetDB HAS '<<' while
+#              SQL Server has NO shift operator (any version) -> this probe splits that one collision.
 DIALECT_PROBES = (
    ("xor", "2^0=2"),
    ("pgpow", "2^3=8"),
    ("intdiv", "5/2=2"),
    ("bitor", "2|0=2"),
+    ("shift", "1<<2=4"),
 )

 def _classify(signature):
@ -58,28 +63,32 @@ def _classify(signature):
    all-false signature, which a minimal engine like ClickHouse/H2/Firebird/HSQLDB/Derby or
    a fully WAF-blocked channel also produces) deliberately fall through to None:

-    >>> _classify((True, False, False, True))            # MySQL / MariaDB / TiDB
+    >>> _classify((True, False, False, True, True))           # MySQL / MariaDB / TiDB
    'MySQL'
-    >>> _classify((True, False, True, True))             # Microsoft SQL Server
+    >>> _classify((True, False, True, True, False))           # Microsoft SQL Server (no bit-shift)
    'Microsoft SQL Server'
-    >>> _classify((False, True, True, True))             # PostgreSQL
+    >>> _classify((True, False, True, True, True))            # MonetDB (same xor/intdiv as MSSQL, but has '<<')
+    'MonetDB'
+    >>> _classify((False, True, True, True, False))           # PostgreSQL
    'PostgreSQL'
-    >>> _classify((False, True, False, True))            # CockroachDB (pgwire) -> PostgreSQL family
+    >>> _classify((False, True, False, True, False))          # CockroachDB (pgwire) -> PostgreSQL family
    'PostgreSQL'
-    >>> _classify((False, False, True, True))            # SQLite
+    >>> _classify((False, False, True, True, True))           # SQLite
    'SQLite'
-    >>> _classify((False, False, True, False)) is None   # Firebird/HSQLDB/Derby/H2 -> no prior
+    >>> _classify((False, False, True, False, False)) is None # Firebird/HSQLDB/Derby/H2/Trino -> no prior
    True
-    >>> _classify((False, False, False, False)) is None  # all-false (Oracle/ClickHouse/blocked) -> no prior
+    >>> _classify((False, False, False, False, False)) is None # all-false (Oracle/ClickHouse/IRIS/blocked) -> no prior
    True
    """

-    xor, pgpow, intdiv, bitor = signature
+    xor, pgpow, intdiv, bitor, shift = signature

    if pgpow:                                               # '^' is exponentiation -> PostgreSQL family
        return DBMS.PGSQL
-    if xor and intdiv:                                      # '^' is XOR AND integer division -> SQL Server
-        return DBMS.MSSQL
+    if xor and intdiv:                                      # '^' is XOR AND integer division -> SQL Server ...
+        # ... except MonetDB shares this exact signature; it alone has a working bit-shift operator
+        # ('1<<2=4'), SQL Server has none -> split the collision (measured zero-FP across 16 engines).
+        return DBMS.MONETDB if shift else DBMS.MSSQL
    if xor and not intdiv:                                  # '^' is XOR AND real division -> MySQL family
        return DBMS.MYSQL
    if not xor and intdiv and bitor:                        # no '^', integer division, bitwise '|' -> SQLite