Adding JSONL as a dump format

2026-06-20 14:40:36 +00:00 · 2026-06-15 16:04:39 +02:00 · 2026-06-15 16:04:39 +02:00 · 403855f701
commit 403855f701
parent 17e94c3409
6 changed files with 185 additions and 9 deletions
--- a/data/txt/sha256sums.txt
+++ b/data/txt/sha256sums.txt
@ -175,8 +175,8 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6  lib/core/data.
 70fb2528e580b22564899595b0dff6b1bc257c6a99d2022ce3996a3d04e68e4e  lib/core/decorators.py
 147823c37596bd6a56d677697781f34b8d1d1671d5a2518fbc9468d623c6d07d  lib/core/defaults.py
 2f44a1bfe6f18aafe64147b99e69aa93cf438c0e7befe59f4e2aee9065c8b7b6  lib/core/dicts.py
-e4b23512625bc377c0e0924d8113c595452320d8c66014828da5d8258a77f55a  lib/core/dump.py
-23e33f0b457e2a7114c9171ba9b42e1751b71ee3f384bba7fad39e4490adb803  lib/core/enums.py
+2592b0fd38c272c0b0d49878f4449437eb8ba8ff7536bb39b2ac9a2511010f7c  lib/core/dump.py
+6b9932d9c789a0e2ac28a493fb7914f49100a1c91de989bcdb20df9d40648522  lib/core/enums.py
 5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4  lib/core/exception.py
 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3  lib/core/__init__.py
 914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad  lib/core/log.py
@ -188,7 +188,7 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7  lib/core/patch
 48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3  lib/core/replication.py
 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6  lib/core/revision.py
 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c  lib/core/session.py
-1e2a5277293de9d3d1e65b401013baf1c4033162e580f6891ca6a2686e666894  lib/core/settings.py
+72448bcfc929496fb0333480a780163a395f65fff92898ad8108daf54a12799b  lib/core/settings.py
 cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82  lib/core/shell.py
 bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725  lib/core/subprocessng.py
 70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6  lib/core/target.py
@ -199,7 +199,7 @@ b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02  lib/core/unesc
 2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c  lib/core/wordlist.py
 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3  lib/__init__.py
 54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821  lib/parse/banner.py
-7bc8612fbd7ba390ab19f908c370c126ae66afa200bc7975800599ecbe029f0c  lib/parse/cmdline.py
+3f298a58a41225ef67c57b2cf08c71f2eacbab8f98463b4461f45933d6a82f69  lib/parse/cmdline.py
 02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158  lib/parse/configfile.py
 c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb  lib/parse/handler.py
 5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c  lib/parse/headers.py
@ -577,6 +577,7 @@ a48c411fea864e6bcd6a1c7e1a35094b8cda8d15088fd9e7b0270542ae20daa9  tests/test_com
 3804eb2d730220360f9dc07d5994eb64e9f65acf3b0d8648df8df2a2177ba8fd  tests/test_decodepage.py
 e40a49cfa73c45b3c3c6d1d1d00738861e270cb7a07b28f5a5356f9c7c800cf2  tests/test_dialect.py
 993a2d4d87c4fbaf261663b069629acc95ee4405aa0c42cf5a8f39649fdb0fff  tests/test_dicts.py
+c706c5dad287e2e8cf707f7aa5eeb9394eddc6ef3a4fea809babf3ae77e8d7fa  tests/test_dump_jsonl.py
 2bbe4b01f79992cfa8884651fc0a28dbd0e3abb0cbea9eb7eadf1f98ca3c3420  tests/test_encoding.py
 bb6991260a994fcbe79e05febaa34affd5631d02299fbc626820addd5f6ea4f4  tests/test_error_engine.py
 8105de9978fe286a29f6b635a58db1e9998d86e8dded54d7efdfb9d52a121094  tests/test_hashdb.py
--- a/lib/core/dump.py
+++ b/lib/core/dump.py
@ -6,6 +6,7 @@ See the file 'LICENSE' for copying permission
 """

 import hashlib
+import json
 import os
 import re
 import shutil
@ -61,6 +62,7 @@ from lib.core.settings import WINDOWS_RESERVED_NAMES
 from lib.utils.safe2bin import safechardecode
 from thirdparty import six
 from thirdparty.magic import magic
+from thirdparty.odict import OrderedDict

 class Dump(object):
    """
@ -461,7 +463,7 @@ class Dump(object):

        if conf.dumpFormat == DUMP_FORMAT.SQLITE:
            replication = Replication(os.path.join(conf.dumpPath, "%s.sqlite3" % safeDb))
-        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
+        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL):
            if not os.path.isdir(dumpDbPath):
                try:
                    os.makedirs(dumpDbPath)
@ -624,6 +626,7 @@ class Dump(object):
            console = (i >= count - TRIM_STDOUT_DUMP_SIZE)
            field = 1
            values = []
+            record = OrderedDict()

            if i == 0 and count > TRIM_STDOUT_DUMP_SIZE:
                self._write(" ...")
@ -674,6 +677,11 @@ class Dump(object):
                            dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel))
                    elif conf.dumpFormat == DUMP_FORMAT.HTML:
                        dataToDumpFile(dumpFP, "<td>%s</td>" % getUnicode(htmlEscape(value).encode("ascii", "xmlcharrefreplace")))
+                    elif conf.dumpFormat == DUMP_FORMAT.JSONL:
+                        if len(info["values"]) <= i or info["values"][i] is None or info["values"][i] == " ":  # NULL
+                            record[unsafeSQLIdentificatorNaming(column)] = None
+                        else:
+                            record[unsafeSQLIdentificatorNaming(column)] = getUnicode(info["values"][i])

                    field += 1

@ -686,6 +694,8 @@ class Dump(object):
                dataToDumpFile(dumpFP, "\n")
            elif conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tr>\n")
+            elif conf.dumpFormat == DUMP_FORMAT.JSONL:
+                dataToDumpFile(dumpFP, "%s\n" % getUnicode(json.dumps(record, ensure_ascii=False)))

            self._write("|", console=console)

@ -695,10 +705,10 @@ class Dump(object):
            rtable.endTransaction()
            logger.info("table '%s.%s' dumped to SQLITE database '%s'" % (db, table, replication.dbpath))

-        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
+        elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL):
            if conf.dumpFormat == DUMP_FORMAT.HTML:
                dataToDumpFile(dumpFP, "</tbody>\n</table>\n<script>let lc=-1,ld=1;function sortTable(n,h){var t=document.querySelector(\"table\"),r=Array.from(t.tBodies[0].rows);ld=(lc==n?-ld:1);lc=n;r.sort((a,b)=>{var x=a.cells[n].innerText.trim(),y=b.cells[n].innerText.trim(),nx=parseFloat(x),ny=parseFloat(y);return(!isNaN(nx)&&!isNaN(ny)?(nx-ny)*ld:x.localeCompare(y)*ld)});r.forEach(e=>t.tBodies[0].appendChild(e));Array.from(t.tHead.rows[0].cells).forEach(c=>{c.innerText=c.innerText.replace(/[\u2191\u2193]/g,\"\")});h.innerText=h.innerText+ (ld==1?\"\u2191\":\"\u2193\");}</script>\n</body>\n</html>")
-            else:
+            elif conf.dumpFormat == DUMP_FORMAT.CSV:
                dataToDumpFile(dumpFP, "\n")
            dumpFP.close()

--- a/lib/core/enums.py
+++ b/lib/core/enums.py
@ -238,6 +238,7 @@ class DUMP_FORMAT(object):
    CSV = "CSV"
    HTML = "HTML"
    SQLITE = "SQLITE"
+    JSONL = "JSONL"

 class HTTP_HEADER(object):
    ACCEPT = "Accept"
--- a/lib/core/settings.py
+++ b/lib/core/settings.py
@ -20,7 +20,7 @@ from lib.core.enums import OS
 from thirdparty import six

 # sqlmap version (<major>.<minor>.<month>.<monthly commit>)
-VERSION = "1.10.6.108"
+VERSION = "1.10.6.109"
 TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
 TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
 VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
--- a/lib/parse/cmdline.py
+++ b/lib/parse/cmdline.py
@ -686,7 +686,7 @@ def cmdLineParser(argv=None):
            help="Store dumped data to a custom file")

        general.add_argument("--dump-format", dest="dumpFormat",
-            help="Format of dumped data (CSV (default), HTML or SQLITE)")
+            help="Dump data format (CSV (default), HTML, SQLITE, JSONL)")

        general.add_argument("--encoding", dest="encoding",
            help="Character encoding used for data retrieval (e.g. GBK)")
--- a/tests/test_dump_jsonl.py
+++ b/tests/test_dump_jsonl.py
@ -0,0 +1,164 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
+See the file 'LICENSE' for copying permission
+
+JSONL output of the per-table dumper (Dump.dbTableValues in lib/core/dump.py).
+
+--dump-format=JSONL writes one self-describing JSON object per row to a
+<host>/dump/<db>/<table>.jsonl file, streaming-safe (one independent line per
+row, no surrounding array/header/footer). These tests pin the contract that an
+automated consumer relies on: column order preserved (so it matches the CSV
+column order and is reproducible on Python 2's unordered dict), the DB-NULL
+marker (" ") mapped to JSON null exactly like --report-json, the empty string
+left intact (NOT collapsed to null), and a strict one-object-per-line layout.
+"""
+
+import json
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+from collections import OrderedDict
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from _testutils import bootstrap
+bootstrap()
+
+from lib.core.common import Backend
+from lib.core.data import conf, kb
+from lib.core.dump import Dump
+from lib.core.enums import DUMP_FORMAT
+
+
+class _JsonlDumpCase(unittest.TestCase):
+    def setUp(self):
+        self._saved = dict((k, conf.get(k)) for k in ("dumpFormat", "dumpPath", "dumpFile", "col", "api", "reportCollector", "limitStart", "limitStop", "csvDel", "forceDbms", "dbms"))
+        self._savedKb = dict((k, kb.get(k)) for k in ("forcedDbms", "dbms"))
+        # A DBMS leaked from an earlier test (e.g. one that uppercases identifiers) would change
+        # both the on-disk filename and the JSON keys, so pin a neutral, case-preserving back-end.
+        conf.forceDbms = conf.dbms = None
+        kb.dbms = None
+        Backend.forceDbms("MySQL")
+        self.tmp = tempfile.mkdtemp(prefix="sqlmap-jsonl-test")
+        conf.dumpFormat = DUMP_FORMAT.JSONL
+        conf.dumpPath = self.tmp
+        conf.dumpFile = None
+        conf.col = None
+        conf.api = False
+        conf.reportCollector = None
+        conf.limitStart = conf.limitStop = None
+        conf.csvDel = ","
+        self.d = Dump()
+        self.d._write = lambda *a, **k: None    # silence the console table
+
+    def tearDown(self):
+        for k, v in self._saved.items():
+            conf[k] = v
+        for k, v in self._savedKb.items():
+            kb[k] = v
+        shutil.rmtree(self.tmp, ignore_errors=True)
+
+    def _dump(self, table_values):
+        self.d.dbTableValues(table_values)
+        db = table_values["__infos__"]["db"] or "All"
+        path = os.path.join(self.tmp, db, "%s.jsonl" % table_values["__infos__"]["table"])
+        with open(path) as f:
+            content = f.read()
+        return content
+
+    def _rows(self, content):
+        return [json.loads(line) for line in content.splitlines() if line.strip()]
+
+
+class TestJsonlContract(_JsonlDumpCase):
+    def test_one_object_per_row(self):
+        content = self._dump({
+            "__infos__": {"count": 2, "db": "testdb", "table": "users"},
+            "id": {"length": 2, "values": ["1", "2"]},
+            "name": {"length": 6, "values": ["luther", "fluffy"]},
+        })
+        # exactly N non-empty lines, each terminated by a newline, each a standalone object
+        lines = content.splitlines()
+        self.assertEqual(len(lines), 2)
+        self.assertTrue(content.endswith("\n"))
+        rows = self._rows(content)
+        self.assertEqual(rows[0], {"id": "1", "name": "luther"})
+        self.assertEqual(rows[1], {"id": "2", "name": "fluffy"})
+
+    def test_no_header_or_footer(self):
+        # unlike CSV (header row) / HTML (doc scaffold), JSONL must be pure data lines
+        content = self._dump({
+            "__infos__": {"count": 1, "db": "testdb", "table": "t"},
+            "id": {"length": 2, "values": ["1"]},
+        })
+        lines = [l for l in content.splitlines() if l.strip()]
+        self.assertEqual(len(lines), 1)
+        self.assertEqual(json.loads(lines[0]), {"id": "1"})
+
+    def test_db_null_becomes_json_null(self):
+        # sqlmap stores a DB NULL as a single space (" "); the machine format must emit JSON null,
+        # consistent with --report-json. An empty string is a real value and must stay "".
+        content = self._dump({
+            "__infos__": {"count": 1, "db": "testdb", "table": "t"},
+            "a": {"length": 1, "values": [" "]},     # DB NULL marker
+            "b": {"length": 1, "values": [""]},       # genuine empty string
+            "c": {"length": 1, "values": ["x"]},
+        })
+        row = self._rows(content)[0]
+        self.assertIsNone(row["a"])
+        self.assertEqual(row["b"], "")
+        self.assertEqual(row["c"], "x")
+
+    def test_missing_value_is_null(self):
+        # a column whose values list is short for this row index must serialize as null, not crash
+        content = self._dump({
+            "__infos__": {"count": 2, "db": "testdb", "table": "t"},
+            "id": {"length": 2, "values": ["1", "2"]},
+            "lagging": {"length": 4, "values": ["only-one"]},   # missing index 1
+        })
+        rows = self._rows(content)
+        self.assertEqual(rows[0], {"id": "1", "lagging": "only-one"})
+        self.assertEqual(rows[1], {"id": "2", "lagging": None})
+
+    def test_column_order_matches_csv(self):
+        # The serialized byte stream must keep the (priority-sorted) column order so output is
+        # reproducible - even on Python 2 where a plain dict would not - and that order must be
+        # the SAME one CSV uses. Build the input as an OrderedDict so the expectation is fixed,
+        # then dump the identical data as both JSONL and CSV and compare the column sequences.
+        def table():
+            tv = OrderedDict()
+            tv["__infos__"] = {"count": 1, "db": "testdb", "table": "t"}
+            tv["zebra"] = {"length": 1, "values": ["1"]}
+            tv["alpha"] = {"length": 1, "values": ["2"]}
+            tv["middle"] = {"length": 1, "values": ["3"]}
+            return tv
+
+        jsonl_line = [l for l in self._dump(table()).splitlines() if l.strip()][0]
+        jsonl_order = [k for k, _ in json.loads(jsonl_line, object_pairs_hook=lambda p: p)]
+
+        conf.dumpFormat = DUMP_FORMAT.CSV
+        csv_path = os.path.join(self.tmp, "testdb", "t.csv")
+        if os.path.exists(csv_path):
+            os.remove(csv_path)
+        self.d.dbTableValues(table())
+        with open(csv_path) as f:
+            csv_header = f.read().splitlines()[0]
+        csv_order = [c.strip() for c in csv_header.split(conf.csvDel)]
+
+        self.assertEqual(jsonl_order, csv_order)
+
+    def test_unicode_value_not_escaped(self):
+        # ensure_ascii=False keeps multibyte data readable; it must round-trip through json.loads
+        content = self._dump({
+            "__infos__": {"count": 1, "db": "testdb", "table": "t"},
+            "name": {"length": 6, "values": [u"\u0107evap"]},
+        })
+        self.assertEqual(self._rows(content)[0]["name"], u"\u0107evap")
+
+
+if __name__ == "__main__":
+    unittest.main()