diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index b95a83ed2..16b7af2c6 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -175,8 +175,8 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data. 70fb2528e580b22564899595b0dff6b1bc257c6a99d2022ce3996a3d04e68e4e lib/core/decorators.py 147823c37596bd6a56d677697781f34b8d1d1671d5a2518fbc9468d623c6d07d lib/core/defaults.py 2f44a1bfe6f18aafe64147b99e69aa93cf438c0e7befe59f4e2aee9065c8b7b6 lib/core/dicts.py -e4b23512625bc377c0e0924d8113c595452320d8c66014828da5d8258a77f55a lib/core/dump.py -23e33f0b457e2a7114c9171ba9b42e1751b71ee3f384bba7fad39e4490adb803 lib/core/enums.py +2592b0fd38c272c0b0d49878f4449437eb8ba8ff7536bb39b2ac9a2511010f7c lib/core/dump.py +6b9932d9c789a0e2ac28a493fb7914f49100a1c91de989bcdb20df9d40648522 lib/core/enums.py 5387168e5dfedd94ae22af7bb255f27d6baaca50b24179c6b98f4f325f5cc7b4 lib/core/exception.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py 914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py @@ -188,7 +188,7 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch 48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -1e2a5277293de9d3d1e65b401013baf1c4033162e580f6891ca6a2686e666894 lib/core/settings.py +72448bcfc929496fb0333480a780163a395f65fff92898ad8108daf54a12799b lib/core/settings.py cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py 70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py @@ -199,7 +199,7 @@ b9aacb840310173202f79c2ba125b0243003ee6b44c92eca50424f2bdfc83c02 lib/core/unesc 2400e465fa4d13e4c32795910878c71ff212e4361b46428d57ce43983f5e997c lib/core/wordlist.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/__init__.py 54bfd31ebded3ffa5848df1c644f196eb704116517c7a3d860b5d081e984d821 lib/parse/banner.py -7bc8612fbd7ba390ab19f908c370c126ae66afa200bc7975800599ecbe029f0c lib/parse/cmdline.py +3f298a58a41225ef67c57b2cf08c71f2eacbab8f98463b4461f45933d6a82f69 lib/parse/cmdline.py 02d82e4069bd98c52755417f8b8e306d79945672656ac24f1a45e7a6eff4b158 lib/parse/configfile.py c5b258be7485089fac9d9cd179960e774fbd85e62836dc67cce76cc028bb6aeb lib/parse/handler.py 5c9a9caee948843d5537745640cc7b98d70a0412cc0949f59d4ebe8b2907c06c lib/parse/headers.py @@ -577,6 +577,7 @@ a48c411fea864e6bcd6a1c7e1a35094b8cda8d15088fd9e7b0270542ae20daa9 tests/test_com 3804eb2d730220360f9dc07d5994eb64e9f65acf3b0d8648df8df2a2177ba8fd tests/test_decodepage.py e40a49cfa73c45b3c3c6d1d1d00738861e270cb7a07b28f5a5356f9c7c800cf2 tests/test_dialect.py 993a2d4d87c4fbaf261663b069629acc95ee4405aa0c42cf5a8f39649fdb0fff tests/test_dicts.py +c706c5dad287e2e8cf707f7aa5eeb9394eddc6ef3a4fea809babf3ae77e8d7fa tests/test_dump_jsonl.py 2bbe4b01f79992cfa8884651fc0a28dbd0e3abb0cbea9eb7eadf1f98ca3c3420 tests/test_encoding.py bb6991260a994fcbe79e05febaa34affd5631d02299fbc626820addd5f6ea4f4 tests/test_error_engine.py 8105de9978fe286a29f6b635a58db1e9998d86e8dded54d7efdfb9d52a121094 tests/test_hashdb.py diff --git a/lib/core/dump.py b/lib/core/dump.py index 9d0eb3857..ebc7d0cd0 100644 --- a/lib/core/dump.py +++ b/lib/core/dump.py @@ -6,6 +6,7 @@ See the file 'LICENSE' for copying permission """ import hashlib +import json import os import re import shutil @@ -61,6 +62,7 @@ from lib.core.settings import WINDOWS_RESERVED_NAMES from lib.utils.safe2bin import safechardecode from thirdparty import six from thirdparty.magic import magic +from thirdparty.odict import OrderedDict class Dump(object): """ @@ -461,7 +463,7 @@ class Dump(object): if conf.dumpFormat == DUMP_FORMAT.SQLITE: replication = Replication(os.path.join(conf.dumpPath, "%s.sqlite3" % safeDb)) - elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML): + elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL): if not os.path.isdir(dumpDbPath): try: os.makedirs(dumpDbPath) @@ -624,6 +626,7 @@ class Dump(object): console = (i >= count - TRIM_STDOUT_DUMP_SIZE) field = 1 values = [] + record = OrderedDict() if i == 0 and count > TRIM_STDOUT_DUMP_SIZE: self._write(" ...") @@ -674,6 +677,11 @@ class Dump(object): dataToDumpFile(dumpFP, "%s%s" % (safeCSValue(value), conf.csvDel)) elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "%s" % getUnicode(htmlEscape(value).encode("ascii", "xmlcharrefreplace"))) + elif conf.dumpFormat == DUMP_FORMAT.JSONL: + if len(info["values"]) <= i or info["values"][i] is None or info["values"][i] == " ": # NULL + record[unsafeSQLIdentificatorNaming(column)] = None + else: + record[unsafeSQLIdentificatorNaming(column)] = getUnicode(info["values"][i]) field += 1 @@ -686,6 +694,8 @@ class Dump(object): dataToDumpFile(dumpFP, "\n") elif conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "\n") + elif conf.dumpFormat == DUMP_FORMAT.JSONL: + dataToDumpFile(dumpFP, "%s\n" % getUnicode(json.dumps(record, ensure_ascii=False))) self._write("|", console=console) @@ -695,10 +705,10 @@ class Dump(object): rtable.endTransaction() logger.info("table '%s.%s' dumped to SQLITE database '%s'" % (db, table, replication.dbpath)) - elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML): + elif conf.dumpFormat in (DUMP_FORMAT.CSV, DUMP_FORMAT.HTML, DUMP_FORMAT.JSONL): if conf.dumpFormat == DUMP_FORMAT.HTML: dataToDumpFile(dumpFP, "\n\n\n\n") - else: + elif conf.dumpFormat == DUMP_FORMAT.CSV: dataToDumpFile(dumpFP, "\n") dumpFP.close() diff --git a/lib/core/enums.py b/lib/core/enums.py index 2e1881f19..137be5d02 100644 --- a/lib/core/enums.py +++ b/lib/core/enums.py @@ -238,6 +238,7 @@ class DUMP_FORMAT(object): CSV = "CSV" HTML = "HTML" SQLITE = "SQLITE" + JSONL = "JSONL" class HTTP_HEADER(object): ACCEPT = "Accept" diff --git a/lib/core/settings.py b/lib/core/settings.py index c9e7ef6e3..0c206a5d3 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.108" +VERSION = "1.10.6.109" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 8198ff8ed..77bcb44db 100644 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -686,7 +686,7 @@ def cmdLineParser(argv=None): help="Store dumped data to a custom file") general.add_argument("--dump-format", dest="dumpFormat", - help="Format of dumped data (CSV (default), HTML or SQLITE)") + help="Dump data format (CSV (default), HTML, SQLITE, JSONL)") general.add_argument("--encoding", dest="encoding", help="Character encoding used for data retrieval (e.g. GBK)") diff --git a/tests/test_dump_jsonl.py b/tests/test_dump_jsonl.py new file mode 100644 index 000000000..a4432e5f1 --- /dev/null +++ b/tests/test_dump_jsonl.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) +See the file 'LICENSE' for copying permission + +JSONL output of the per-table dumper (Dump.dbTableValues in lib/core/dump.py). + +--dump-format=JSONL writes one self-describing JSON object per row to a +/dump//.jsonl file, streaming-safe (one independent line per +row, no surrounding array/header/footer). These tests pin the contract that an +automated consumer relies on: column order preserved (so it matches the CSV +column order and is reproducible on Python 2's unordered dict), the DB-NULL +marker (" ") mapped to JSON null exactly like --report-json, the empty string +left intact (NOT collapsed to null), and a strict one-object-per-line layout. +""" + +import json +import os +import shutil +import sys +import tempfile +import unittest + +from collections import OrderedDict + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from _testutils import bootstrap +bootstrap() + +from lib.core.common import Backend +from lib.core.data import conf, kb +from lib.core.dump import Dump +from lib.core.enums import DUMP_FORMAT + + +class _JsonlDumpCase(unittest.TestCase): + def setUp(self): + self._saved = dict((k, conf.get(k)) for k in ("dumpFormat", "dumpPath", "dumpFile", "col", "api", "reportCollector", "limitStart", "limitStop", "csvDel", "forceDbms", "dbms")) + self._savedKb = dict((k, kb.get(k)) for k in ("forcedDbms", "dbms")) + # A DBMS leaked from an earlier test (e.g. one that uppercases identifiers) would change + # both the on-disk filename and the JSON keys, so pin a neutral, case-preserving back-end. + conf.forceDbms = conf.dbms = None + kb.dbms = None + Backend.forceDbms("MySQL") + self.tmp = tempfile.mkdtemp(prefix="sqlmap-jsonl-test") + conf.dumpFormat = DUMP_FORMAT.JSONL + conf.dumpPath = self.tmp + conf.dumpFile = None + conf.col = None + conf.api = False + conf.reportCollector = None + conf.limitStart = conf.limitStop = None + conf.csvDel = "," + self.d = Dump() + self.d._write = lambda *a, **k: None # silence the console table + + def tearDown(self): + for k, v in self._saved.items(): + conf[k] = v + for k, v in self._savedKb.items(): + kb[k] = v + shutil.rmtree(self.tmp, ignore_errors=True) + + def _dump(self, table_values): + self.d.dbTableValues(table_values) + db = table_values["__infos__"]["db"] or "All" + path = os.path.join(self.tmp, db, "%s.jsonl" % table_values["__infos__"]["table"]) + with open(path) as f: + content = f.read() + return content + + def _rows(self, content): + return [json.loads(line) for line in content.splitlines() if line.strip()] + + +class TestJsonlContract(_JsonlDumpCase): + def test_one_object_per_row(self): + content = self._dump({ + "__infos__": {"count": 2, "db": "testdb", "table": "users"}, + "id": {"length": 2, "values": ["1", "2"]}, + "name": {"length": 6, "values": ["luther", "fluffy"]}, + }) + # exactly N non-empty lines, each terminated by a newline, each a standalone object + lines = content.splitlines() + self.assertEqual(len(lines), 2) + self.assertTrue(content.endswith("\n")) + rows = self._rows(content) + self.assertEqual(rows[0], {"id": "1", "name": "luther"}) + self.assertEqual(rows[1], {"id": "2", "name": "fluffy"}) + + def test_no_header_or_footer(self): + # unlike CSV (header row) / HTML (doc scaffold), JSONL must be pure data lines + content = self._dump({ + "__infos__": {"count": 1, "db": "testdb", "table": "t"}, + "id": {"length": 2, "values": ["1"]}, + }) + lines = [l for l in content.splitlines() if l.strip()] + self.assertEqual(len(lines), 1) + self.assertEqual(json.loads(lines[0]), {"id": "1"}) + + def test_db_null_becomes_json_null(self): + # sqlmap stores a DB NULL as a single space (" "); the machine format must emit JSON null, + # consistent with --report-json. An empty string is a real value and must stay "". + content = self._dump({ + "__infos__": {"count": 1, "db": "testdb", "table": "t"}, + "a": {"length": 1, "values": [" "]}, # DB NULL marker + "b": {"length": 1, "values": [""]}, # genuine empty string + "c": {"length": 1, "values": ["x"]}, + }) + row = self._rows(content)[0] + self.assertIsNone(row["a"]) + self.assertEqual(row["b"], "") + self.assertEqual(row["c"], "x") + + def test_missing_value_is_null(self): + # a column whose values list is short for this row index must serialize as null, not crash + content = self._dump({ + "__infos__": {"count": 2, "db": "testdb", "table": "t"}, + "id": {"length": 2, "values": ["1", "2"]}, + "lagging": {"length": 4, "values": ["only-one"]}, # missing index 1 + }) + rows = self._rows(content) + self.assertEqual(rows[0], {"id": "1", "lagging": "only-one"}) + self.assertEqual(rows[1], {"id": "2", "lagging": None}) + + def test_column_order_matches_csv(self): + # The serialized byte stream must keep the (priority-sorted) column order so output is + # reproducible - even on Python 2 where a plain dict would not - and that order must be + # the SAME one CSV uses. Build the input as an OrderedDict so the expectation is fixed, + # then dump the identical data as both JSONL and CSV and compare the column sequences. + def table(): + tv = OrderedDict() + tv["__infos__"] = {"count": 1, "db": "testdb", "table": "t"} + tv["zebra"] = {"length": 1, "values": ["1"]} + tv["alpha"] = {"length": 1, "values": ["2"]} + tv["middle"] = {"length": 1, "values": ["3"]} + return tv + + jsonl_line = [l for l in self._dump(table()).splitlines() if l.strip()][0] + jsonl_order = [k for k, _ in json.loads(jsonl_line, object_pairs_hook=lambda p: p)] + + conf.dumpFormat = DUMP_FORMAT.CSV + csv_path = os.path.join(self.tmp, "testdb", "t.csv") + if os.path.exists(csv_path): + os.remove(csv_path) + self.d.dbTableValues(table()) + with open(csv_path) as f: + csv_header = f.read().splitlines()[0] + csv_order = [c.strip() for c in csv_header.split(conf.csvDel)] + + self.assertEqual(jsonl_order, csv_order) + + def test_unicode_value_not_escaped(self): + # ensure_ascii=False keeps multibyte data readable; it must round-trip through json.loads + content = self._dump({ + "__infos__": {"count": 1, "db": "testdb", "table": "t"}, + "name": {"length": 6, "values": [u"\u0107evap"]}, + }) + self.assertEqual(self._rows(content)[0]["name"], u"\u0107evap") + + +if __name__ == "__main__": + unittest.main()