mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2026-06-20 14:40:36 +00:00
264 lines
12 KiB
Python
264 lines
12 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
|
|
See the file 'LICENSE' for copying permission
|
|
|
|
Property/fuzz tests for the pure parsers and transforms. Where the other test
|
|
files pin specific examples, these assert INVARIANTS over hundreds of randomized
|
|
(but deterministic, cross-version-identical - see _testutils.Rng) inputs, which is
|
|
the cheap net for the edge-bug class that example tests miss (commas inside quoted
|
|
literals / nested parens, NUL / 0xff / astral code points in codecs, etc.).
|
|
|
|
Property families:
|
|
- codec/serializer pairs round-trip: decode(encode(x)) == x
|
|
- structure transforms preserve their contract (flat/de-arrayized/permutation)
|
|
- string transforms hold their stated invariant (ASCII-only, no newlines, ...)
|
|
- random helpers respect length / alphabet / range bounds
|
|
- splitFields/zeroDepthSearch partition faithfully and never cut inside a group
|
|
- a batch of transforms never raise on arbitrary input
|
|
|
|
On failure _testutils.for_all prints the exact offending input + its case index so
|
|
it reproduces on any interpreter.
|
|
"""
|
|
|
|
import os
|
|
import string
|
|
import sys
|
|
import unittest
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
from _testutils import bootstrap, for_all, set_dbms
|
|
bootstrap()
|
|
|
|
from extra.cloak.cloak import cloak, decloak
|
|
from lib.core.common import (escapeJsonValue, filterStringValue, flattenValue, isListLike, normalizeUnicode,
|
|
prioritySortColumns, randomInt, randomRange, randomStr, safeSQLIdentificatorNaming,
|
|
sanitizeStr, splitFields, unArrayizeValue, unsafeSQLIdentificatorNaming, urldecode,
|
|
urlencode, zeroDepthSearch)
|
|
from lib.core.convert import (base64pickle, base64unpickle, decodeBase64, decodeHex, dejsonize, encodeBase64,
|
|
encodeHex, getBytes, getConsoleLength, getOrds, getText, htmlEscape, htmlUnescape,
|
|
jsonize, stdoutEncode)
|
|
from lib.core.data import kb
|
|
from lib.utils.safe2bin import safecharencode
|
|
|
|
|
|
# --- input strategies (draw ONLY through rng: randint / choice / sample / blob) ---
|
|
|
|
# deliberately loaded with structural metacharacters + tricky code points
|
|
_TEXT = [u"a", u"Z", u"7", u" ", u",", u"'", u'"', u"(", u")", u"\\", u";",
|
|
u"\n", u"\t", u"\x00", u"\x7f", u"\xe9", u"\u0107", u"\u4e2d", u"\U0001F600", u" FROM "]
|
|
|
|
|
|
def gen_text(rng):
|
|
return u"".join(rng.choice(_TEXT) for _ in range(rng.randint(0, 24)))
|
|
|
|
|
|
def gen_ascii(rng):
|
|
return u"".join(rng.choice(string.printable) for _ in range(rng.randint(0, 20)))
|
|
|
|
|
|
def gen_blob(rng):
|
|
return rng.blob(rng.randint(0, 32))
|
|
|
|
|
|
def gen_json(rng):
|
|
# JSON-safe only: tuples become lists and non-str keys are coerced, so exclude them here
|
|
if rng.randint(0, 4) == 0:
|
|
return [gen_json(rng) for _ in range(rng.randint(0, 3))]
|
|
if rng.randint(0, 4) == 0:
|
|
return dict((u"k%d" % j, gen_json(rng)) for j in range(rng.randint(0, 3)))
|
|
return rng.choice([0, 1, -1, 2 ** 31, 1.5, -0.25, True, False, None, u"", u"x", u"\u0107", u'a"b,c'])
|
|
|
|
|
|
def gen_pickle(rng):
|
|
kind = rng.randint(0, 9)
|
|
if kind < 5:
|
|
return rng.choice([0, -7, 2 ** 40, 3.5, True, False, None, u"\u0107x", b"\x00\xff", u""])
|
|
if kind < 7:
|
|
return [gen_pickle(rng) for _ in range(rng.randint(0, 3))]
|
|
if kind < 8:
|
|
return tuple(gen_pickle(rng) for _ in range(rng.randint(0, 3)))
|
|
if kind < 9:
|
|
return set(rng.choice([1, 2, 3, u"a", u"b"]) for _ in range(rng.randint(0, 3)))
|
|
return dict((u"k%d" % j, gen_pickle(rng)) for j in range(rng.randint(0, 2)))
|
|
|
|
|
|
def gen_columns(rng):
|
|
return [rng.choice([u"id", u"userid", u"name", u"password", u"a", u"created_id", u"x_id_y", u"data"])
|
|
for _ in range(rng.randint(0, 6))]
|
|
|
|
|
|
def gen_ident(rng):
|
|
# clean (round-trippable) identifier names: letters/digits/underscore, optional dot/space
|
|
chars = string.ascii_letters + string.digits + u"_"
|
|
name = u"".join(rng.choice(chars) for _ in range(rng.randint(1, 10)))
|
|
if rng.randint(0, 3) == 0:
|
|
name += rng.choice([u".col", u" alias", u"_2"])
|
|
return name
|
|
|
|
|
|
# well-formed field lists: balanced parens, properly closed/escaped quotes
|
|
_TOKENS = [u"foo", u"bar", u"id", u"a b", u"1", u"*", u"max(a)", u"COALESCE(a, b, c)", u"func(x, y)"]
|
|
_QUOTED = [u"a,b", u"x, y", u"f(1, 2)", u"o''k", u"plain", u""]
|
|
|
|
|
|
def gen_sql_fields(rng):
|
|
parts = []
|
|
for _ in range(rng.randint(1, 5)):
|
|
t = rng.randint(0, 9)
|
|
if t < 5:
|
|
parts.append(rng.choice(_TOKENS))
|
|
elif t < 8:
|
|
q = rng.choice([u"'", u'"'])
|
|
parts.append(q + rng.choice(_QUOTED) + q)
|
|
else:
|
|
parts.append(u"g(%s, %s)" % (rng.choice(_TOKENS), rng.choice(_TOKENS)))
|
|
return u", ".join(parts)
|
|
|
|
|
|
class TestCodecRoundTrips(unittest.TestCase):
|
|
def test_base64(self):
|
|
for_all(self, gen_blob, lambda b: decodeBase64(encodeBase64(b)) == b, label="base64")
|
|
|
|
def test_hex(self):
|
|
for_all(self, gen_blob, lambda b: decodeHex(encodeHex(b)) == b, label="hex")
|
|
|
|
def test_getbytes_gettext(self):
|
|
# unsafe=False -> plain UTF-8 (no \xNN escape interpretation), so it is a clean round-trip
|
|
for_all(self, gen_text, lambda s: getText(getBytes(s, unsafe=False)) == s, label="bytes-text")
|
|
|
|
def test_json(self):
|
|
for_all(self, gen_json, lambda v: dejsonize(jsonize(v)) == v, label="json")
|
|
|
|
def test_pickle(self):
|
|
for_all(self, gen_pickle, lambda v: base64unpickle(base64pickle(v)) == v, label="pickle")
|
|
|
|
def test_html_escape(self):
|
|
for_all(self, gen_text, lambda s: htmlUnescape(htmlEscape(s)) == s, label="html")
|
|
|
|
def test_cloak(self):
|
|
for_all(self, gen_blob, lambda b: decloak(data=cloak(data=b)) == b, label="cloak")
|
|
|
|
|
|
class TestStructureTransforms(unittest.TestCase):
|
|
def test_unarrayize_never_listlike(self):
|
|
# the whole point of unArrayizeValue is that the result is a scalar, never a list/tuple
|
|
# (gen_pickle includes sets - they used to crash here; see test_unarrayize_set regression)
|
|
for_all(self, gen_pickle, lambda v: not isListLike(unArrayizeValue(v)), label="unarrayize")
|
|
|
|
def test_flatten_is_flat(self):
|
|
for_all(self, gen_pickle, lambda v: all(not isListLike(x) for x in flattenValue([v])), label="flatten")
|
|
|
|
def test_unarrayize_set(self):
|
|
# regression: a 1-element set is list-like but not subscriptable; unArrayizeValue must
|
|
# de-arrayize it rather than crash on value[0]
|
|
self.assertEqual(unArrayizeValue(set(["x"])), "x")
|
|
self.assertEqual(unArrayizeValue(set()), None)
|
|
self.assertEqual(unArrayizeValue(["1"]), "1") # ordinary fast-path still works
|
|
|
|
def test_prioritysort_is_permutation(self):
|
|
# sorting must not invent/drop columns, and must be idempotent
|
|
def prop(cols):
|
|
out = prioritySortColumns(cols)
|
|
return sorted(out) == sorted(cols) and prioritySortColumns(out) == out
|
|
for_all(self, gen_columns, prop, label="prioritysort")
|
|
|
|
|
|
class TestStringTransforms(unittest.TestCase):
|
|
def test_normalize_unicode_is_ascii(self):
|
|
for_all(self, gen_text, lambda s: all(ord(c) < 128 for c in normalizeUnicode(s)), label="normalize-ascii")
|
|
|
|
def test_sanitizestr_strips_newlines(self):
|
|
for_all(self, gen_text, lambda s: "\n" not in sanitizeStr(s) and "\r" not in sanitizeStr(s), label="sanitizestr")
|
|
|
|
def test_filterstringvalue_charset(self):
|
|
allowed = set("0123456789abcdef")
|
|
for_all(self, gen_text, lambda s: set(filterStringValue(s, r"[0-9a-f]")) <= allowed, label="filterstring")
|
|
|
|
def test_escapejson_no_control_char(self):
|
|
# control chars and bare quotes must be escaped away (output is JSON-string-body safe re: those)
|
|
for_all(self, gen_text, lambda s: all(c >= " " for c in escapeJsonValue(s)), label="escapejson-invariant")
|
|
|
|
def test_escapejson_json_roundtrip(self):
|
|
# escapeJsonValue(s) embedded in a JSON string must parse back to s - for ALL text,
|
|
# including backslash (the F1 fix; this used to fail on '\')
|
|
import json
|
|
for_all(self, gen_text, lambda s: json.loads(u'"%s"' % escapeJsonValue(s)) == s, label="escapejson-roundtrip")
|
|
|
|
def test_escapejson_backslash(self):
|
|
# regression for F1: backslash is now escaped, so the round-trip holds
|
|
import json
|
|
self.assertEqual(json.loads(u'"%s"' % escapeJsonValue(u"a\\b")), u"a\\b")
|
|
|
|
def test_getords_length(self):
|
|
for_all(self, gen_text, lambda s: len(getOrds(s)) == len(s) and all(isinstance(o, int) for o in getOrds(s)), label="getords")
|
|
|
|
def test_consolelength_ascii(self):
|
|
for_all(self, gen_ascii, lambda s: getConsoleLength(s) == len(s), label="consolelength")
|
|
|
|
|
|
class TestRandomHelpers(unittest.TestCase):
|
|
def test_randomstr_length_and_alphabet(self):
|
|
for_all(self, lambda r: r.randint(0, 16),
|
|
lambda n: len(randomStr(n)) == n and set(randomStr(n)) <= set(string.ascii_letters), label="randomstr")
|
|
|
|
def test_randomstr_lowercase(self):
|
|
for_all(self, lambda r: r.randint(0, 16),
|
|
lambda n: set(randomStr(n, lowercase=True)) <= set(string.ascii_lowercase), label="randomstr-lower")
|
|
|
|
def test_randomint_digits(self):
|
|
for_all(self, lambda r: r.randint(1, 8), lambda n: len(str(randomInt(n))) == n, label="randomint")
|
|
|
|
def test_randomrange_bounds(self):
|
|
def prop(_):
|
|
a = _[0]
|
|
b = _[0] + _[1]
|
|
return a <= randomRange(a, b) <= b
|
|
for_all(self, lambda r: (r.randint(-50, 50), r.randint(0, 100)), prop, label="randomrange")
|
|
|
|
|
|
class TestSplitterInvariants(unittest.TestCase):
|
|
def test_reconstruction(self):
|
|
# pure partition identity: rejoining the 0-depth split must reproduce the (space-normalized) input
|
|
for_all(self, gen_text, lambda s: u",".join(splitFields(s)) == s.replace(", ", ","), label="split-reconstruct-text")
|
|
for_all(self, gen_sql_fields, lambda s: u",".join(splitFields(s)) == s.replace(", ", ","), label="split-reconstruct-sql")
|
|
|
|
def test_never_cuts_inside_parens(self):
|
|
# on well-formed input no field may carry unbalanced parens (i.e. a split never lands inside a group)
|
|
for_all(self, gen_sql_fields, lambda s: all(f.count(u"(") == f.count(u")") for f in splitFields(s)), label="split-balanced")
|
|
|
|
def test_zerodepth_indices_are_real_commas(self):
|
|
def prop(s):
|
|
idx = zeroDepthSearch(s, ",")
|
|
return all(s[i] == u"," for i in idx) and idx == sorted(idx) and len(set(idx)) == len(idx)
|
|
for_all(self, gen_text, prop, label="zerodepth-commas-text")
|
|
for_all(self, gen_sql_fields, prop, label="zerodepth-commas-sql")
|
|
|
|
|
|
class TestIdentifierRoundTrip(unittest.TestCase):
|
|
def setUp(self):
|
|
self._saved = kb.get("forcedDbms")
|
|
set_dbms("MySQL") # identifier quoting is DBMS-specific; pin a case-preserving back-end
|
|
|
|
def tearDown(self):
|
|
kb.forcedDbms = self._saved
|
|
|
|
def test_safe_unsafe_roundtrip(self):
|
|
for_all(self, gen_ident, lambda n: unsafeSQLIdentificatorNaming(safeSQLIdentificatorNaming(n)) == n, label="identifier")
|
|
|
|
|
|
class TestRobustness(unittest.TestCase):
|
|
# total functions: must never raise on arbitrary text (return value unconstrained)
|
|
def test_urlencode_urldecode(self):
|
|
for_all(self, gen_text, lambda s: (urlencode(s), urldecode(s)) and True, label="urlcodec")
|
|
|
|
def test_safecharencode(self):
|
|
for_all(self, gen_text, lambda s: safecharencode(s) is not None or s == u"", label="safecharencode")
|
|
|
|
def test_stdoutencode(self):
|
|
for_all(self, gen_text, lambda s: stdoutEncode(s) is not None or s == u"", label="stdoutencode")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|