Adding JSONL as a dump format

This commit is contained in:
Miroslav Štampar 2026-06-15 16:04:39 +02:00
parent 17e94c3409
commit 403855f701
6 changed files with 185 additions and 9 deletions

View file

@ -175,8 +175,8 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data.
70fb2528e580b22564899595b0dff6b1bc257c6a99d2022ce3996a3d04e68e4e lib/core/decorators.py
147823c37596bd6a56d677697781f34b8d1d1671d5a2518fbc9468d623c6d07d lib/core/defaults.py
2f44a1bfe6f18aafe64147b99e69aa93cf438c0e7befe59f4e2aee9065c8b7b6 lib/core/dicts.py
e4b23512625bc377c0e0924d8113c595452320d8c66014828da5d8258a77f55a lib/core/dump.py
23e33f0b457e2a7114c9171ba9b42e1751b71ee3f384bba7fad39e4490adb803 lib/core/enums.py
2592b0fd38c272c0b0d49878f4449437eb8ba8ff7536bb39b2ac9a2511010f7c lib/core/dump.py
6b9932d9c789a0e2ac28a493fb7914f49100a1c91de989bcdb20df9d40648522 lib/core/enums.py
5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4 lib/core/exception.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py
914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py
@ -188,7 +188,7 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch
48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
1e2a5277293de9d3d1e65b401013baf1c4033162e580f6891ca6a2686e666894 lib/core/settings.py
72448bcfc929496fb0333480a780163a395f65fff92898ad8108daf54a12799b lib/core/settings.py
cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py
bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py
70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py
@ -199,7 +199,7 @@ b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02 lib/core/unesc
2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c lib/core/wordlist.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/__init__.py
54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821 lib/parse/banner.py
7bc8612fbd7ba390ab19f908c370c126ae66afa200bc7975800599ecbe029f0c lib/parse/cmdline.py
3f298a58a41225ef67c57b2cf08c71f2eacbab8f98463b4461f45933d6a82f69 lib/parse/cmdline.py
02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158 lib/parse/configfile.py
c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/handler.py
5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c lib/parse/headers.py
@ -577,6 +577,7 @@ a48c411fea864e6bcd6a1c7e1a35094b8cda8d15088fd9e7b0270542ae20daa9 tests/test_com
3804eb2d730220360f9dc07d5994eb64e9f65acf3b0d8648df8df2a2177ba8fd tests/test_decodepage.py
e40a49cfa73c45b3c3c6d1d1d00738861e270cb7a07b28f5a5356f9c7c800cf2 tests/test_dialect.py
993a2d4d87c4fbaf261663b069629acc95ee4405aa0c42cf5a8f39649fdb0fff tests/test_dicts.py
c706c5dad287e2e8cf707f7aa5eeb9394eddc6ef3a4fea809babf3ae77e8d7fa tests/test_dump_jsonl.py
2bbe4b01f79992cfa8884651fc0a28dbd0e3abb0cbea9eb7eadf1f98ca3c3420 tests/test_encoding.py
bb6991260a994fcbe79e05febaa34affd5631d02299fbc626820addd5f6ea4f4 tests/test_error_engine.py
8105de9978fe286a29f6b635a58db1e9998d86e8dded54d7efdfb9d52a121094 tests/test_hashdb.py

View file

@ -6,6 +6,7 @@ See the file 'LICENSE' for copying permission
"""
import hashlib
import json
import os
import re
import shutil
@ -61,6 +62,7 @@ from lib.core.settings import WINDOWS_RESERVED_NAMES
from lib.utils.safe2bin import safechardecode
from thirdparty import six
from thirdparty.magic import magic
from thirdparty.odict import OrderedDict
class Dump(object):
"""
@ -461,7 +463,7 @@ class Dump(object):
if conf.dumpFormat == DUMP_FORMAT.SQLITE:
replication = Replication(os.path.join(conf.dumpPath, "%s.sqlite3" % safeDb))
elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL):
if not os.path.isdir(dumpDbPath):
try:
os.makedirs(dumpDbPath)
@ -624,6 +626,7 @@ class Dump(object):
console = (i >= count - TRIM_STDOUT_DUMP_SIZE)
field = 1
values = []
record = OrderedDict()
if i == 0 and count > TRIM_STDOUT_DUMP_SIZE:
self._write(" ...")
@ -674,6 +677,11 @@ class Dump(object):
dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel))
elif conf.dumpFormat == DUMP_FORMAT.HTML:
dataToDumpFile(dumpFP, "<td>%s</td>" % getUnicode(htmlEscape(value).encode("ascii", "xmlcharrefreplace")))
elif conf.dumpFormat == DUMP_FORMAT.JSONL:
if len(info["values"]) <= i or info["values"][i] is None or info["values"][i] == " ": # NULL
record[unsafeSQLIdentificatorNaming(column)] = None
else:
record[unsafeSQLIdentificatorNaming(column)] = getUnicode(info["values"][i])
field += 1
@ -686,6 +694,8 @@ class Dump(object):
dataToDumpFile(dumpFP, "\n")
elif conf.dumpFormat == DUMP_FORMAT.HTML:
dataToDumpFile(dumpFP, "</tr>\n")
elif conf.dumpFormat == DUMP_FORMAT.JSONL:
dataToDumpFile(dumpFP, "%s\n" % getUnicode(json.dumps(record, ensure_ascii=False)))
self._write("|", console=console)
@ -695,10 +705,10 @@ class Dump(object):
rtable.endTransaction()
logger.info("table '%s.%s' dumped to SQLITE database '%s'" % (db, table, replication.dbpath))
elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML):
elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL):
if conf.dumpFormat == DUMP_FORMAT.HTML:
dataToDumpFile(dumpFP, "</tbody>\n</table>\n<script>let lc=-1,ld=1;function sortTable(n,h){var t=document.querySelector(\"table\"),r=Array.from(t.tBodies[0].rows);ld=(lc==n?-ld:1);lc=n;r.sort((a,b)=>{var x=a.cells[n].innerText.trim(),y=b.cells[n].innerText.trim(),nx=parseFloat(x),ny=parseFloat(y);return(!isNaN(nx)&&!isNaN(ny)?(nx-ny)*ld:x.localeCompare(y)*ld)});r.forEach(e=>t.tBodies[0].appendChild(e));Array.from(t.tHead.rows[0].cells).forEach(c=>{c.innerText=c.innerText.replace(/[\u2191\u2193]/g,\"\")});h.innerText=h.innerText+ (ld==1?\"\u2191\":\"\u2193\");}</script>\n</body>\n</html>")
else:
elif conf.dumpFormat == DUMP_FORMAT.CSV:
dataToDumpFile(dumpFP, "\n")
dumpFP.close()

View file

@ -238,6 +238,7 @@ class DUMP_FORMAT(object):
CSV = "CSV"
HTML = "HTML"
SQLITE = "SQLITE"
JSONL = "JSONL"
class HTTP_HEADER(object):
ACCEPT = "Accept"

View file

@ -20,7 +20,7 @@ from lib.core.enums import OS
from thirdparty import six
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.10.6.108"
VERSION = "1.10.6.109"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)

View file

@ -686,7 +686,7 @@ def cmdLineParser(argv=None):
help="Store dumped data to a custom file")
general.add_argument("--dump-format", dest="dumpFormat",
help="Format of dumped data (CSV (default), HTML or SQLITE)")
help="Dump data format (CSV (default), HTML, SQLITE, JSONL)")
general.add_argument("--encoding", dest="encoding",
help="Character encoding used for data retrieval (e.g. GBK)")

164
tests/test_dump_jsonl.py Normal file
View file

@ -0,0 +1,164 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
JSONL output of the per-table dumper (Dump.dbTableValues in lib/core/dump.py).
--dump-format=JSONL writes one self-describing JSON object per row to a
<host>/dump/<db>/<table>.jsonl file, streaming-safe (one independent line per
row, no surrounding array/header/footer). These tests pin the contract that an
automated consumer relies on: column order preserved (so it matches the CSV
column order and is reproducible on Python 2's unordered dict), the DB-NULL
marker (" ") mapped to JSON null exactly like --report-json, the empty string
left intact (NOT collapsed to null), and a strict one-object-per-line layout.
"""
import json
import os
import shutil
import sys
import tempfile
import unittest
from collections import OrderedDict
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _testutils import bootstrap
bootstrap()
from lib.core.common import Backend
from lib.core.data import conf, kb
from lib.core.dump import Dump
from lib.core.enums import DUMP_FORMAT
class _JsonlDumpCase(unittest.TestCase):
def setUp(self):
self._saved = dict((k, conf.get(k)) for k in ("dumpFormat", "dumpPath", "dumpFile", "col", "api", "reportCollector", "limitStart", "limitStop", "csvDel", "forceDbms", "dbms"))
self._savedKb = dict((k, kb.get(k)) for k in ("forcedDbms", "dbms"))
# A DBMS leaked from an earlier test (e.g. one that uppercases identifiers) would change
# both the on-disk filename and the JSON keys, so pin a neutral, case-preserving back-end.
conf.forceDbms = conf.dbms = None
kb.dbms = None
Backend.forceDbms("MySQL")
self.tmp = tempfile.mkdtemp(prefix="sqlmap-jsonl-test")
conf.dumpFormat = DUMP_FORMAT.JSONL
conf.dumpPath = self.tmp
conf.dumpFile = None
conf.col = None
conf.api = False
conf.reportCollector = None
conf.limitStart = conf.limitStop = None
conf.csvDel = ","
self.d = Dump()
self.d._write = lambda *a, **k: None # silence the console table
def tearDown(self):
for k, v in self._saved.items():
conf[k] = v
for k, v in self._savedKb.items():
kb[k] = v
shutil.rmtree(self.tmp, ignore_errors=True)
def _dump(self, table_values):
self.d.dbTableValues(table_values)
db = table_values["__infos__"]["db"] or "All"
path = os.path.join(self.tmp, db, "%s.jsonl" % table_values["__infos__"]["table"])
with open(path) as f:
content = f.read()
return content
def _rows(self, content):
return [json.loads(line) for line in content.splitlines() if line.strip()]
class TestJsonlContract(_JsonlDumpCase):
def test_one_object_per_row(self):
content = self._dump({
"__infos__": {"count": 2, "db": "testdb", "table": "users"},
"id": {"length": 2, "values": ["1", "2"]},
"name": {"length": 6, "values": ["luther", "fluffy"]},
})
# exactly N non-empty lines, each terminated by a newline, each a standalone object
lines = content.splitlines()
self.assertEqual(len(lines), 2)
self.assertTrue(content.endswith("\n"))
rows = self._rows(content)
self.assertEqual(rows[0], {"id": "1", "name": "luther"})
self.assertEqual(rows[1], {"id": "2", "name": "fluffy"})
def test_no_header_or_footer(self):
# unlike CSV (header row) / HTML (doc scaffold), JSONL must be pure data lines
content = self._dump({
"__infos__": {"count": 1, "db": "testdb", "table": "t"},
"id": {"length": 2, "values": ["1"]},
})
lines = [l for l in content.splitlines() if l.strip()]
self.assertEqual(len(lines), 1)
self.assertEqual(json.loads(lines[0]), {"id": "1"})
def test_db_null_becomes_json_null(self):
# sqlmap stores a DB NULL as a single space (" "); the machine format must emit JSON null,
# consistent with --report-json. An empty string is a real value and must stay "".
content = self._dump({
"__infos__": {"count": 1, "db": "testdb", "table": "t"},
"a": {"length": 1, "values": [" "]}, # DB NULL marker
"b": {"length": 1, "values": [""]}, # genuine empty string
"c": {"length": 1, "values": ["x"]},
})
row = self._rows(content)[0]
self.assertIsNone(row["a"])
self.assertEqual(row["b"], "")
self.assertEqual(row["c"], "x")
def test_missing_value_is_null(self):
# a column whose values list is short for this row index must serialize as null, not crash
content = self._dump({
"__infos__": {"count": 2, "db": "testdb", "table": "t"},
"id": {"length": 2, "values": ["1", "2"]},
"lagging": {"length": 4, "values": ["only-one"]}, # missing index 1
})
rows = self._rows(content)
self.assertEqual(rows[0], {"id": "1", "lagging": "only-one"})
self.assertEqual(rows[1], {"id": "2", "lagging": None})
def test_column_order_matches_csv(self):
# The serialized byte stream must keep the (priority-sorted) column order so output is
# reproducible - even on Python 2 where a plain dict would not - and that order must be
# the SAME one CSV uses. Build the input as an OrderedDict so the expectation is fixed,
# then dump the identical data as both JSONL and CSV and compare the column sequences.
def table():
tv = OrderedDict()
tv["__infos__"] = {"count": 1, "db": "testdb", "table": "t"}
tv["zebra"] = {"length": 1, "values": ["1"]}
tv["alpha"] = {"length": 1, "values": ["2"]}
tv["middle"] = {"length": 1, "values": ["3"]}
return tv
jsonl_line = [l for l in self._dump(table()).splitlines() if l.strip()][0]
jsonl_order = [k for k, _ in json.loads(jsonl_line, object_pairs_hook=lambda p: p)]
conf.dumpFormat = DUMP_FORMAT.CSV
csv_path = os.path.join(self.tmp, "testdb", "t.csv")
if os.path.exists(csv_path):
os.remove(csv_path)
self.d.dbTableValues(table())
with open(csv_path) as f:
csv_header = f.read().splitlines()[0]
csv_order = [c.strip() for c in csv_header.split(conf.csvDel)]
self.assertEqual(jsonl_order, csv_order)
def test_unicode_value_not_escaped(self):
# ensure_ascii=False keeps multibyte data readable; it must round-trip through json.loads
content = self._dump({
"__infos__": {"count": 1, "db": "testdb", "table": "t"},
"name": {"length": 6, "values": [u"\u0107evap"]},
})
self.assertEqual(self._rows(content)[0]["name"], u"\u0107evap")
if __name__ == "__main__":
unittest.main()