sqlmap/tests/test_bigarray.py
Miroslav Štampar 0b2b3e956f Minor patch
2026-06-22 00:17:11 +02:00

154 lines
5.8 KiB
Python

#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
BigArray disk-spill semantics (lib/core/bigarray.py).
BigArray is the structure that lets sqlmap dump tables far larger than RAM: once
the in-memory chunk exceeds chunk_size it is pickled to a temp file and a new
chunk starts. The tricky, easy-to-break part is that indexing / iteration /
pop / pickling must stay correct ACROSS the in-memory<->on-disk boundary.
These force a spill with a tiny chunk_size and assert the data survives intact.
"""
import os
import pickle
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _testutils import bootstrap
bootstrap()
from lib.core.bigarray import BigArray
N = 5000
def _make_spilled():
# tiny chunk_size guarantees many on-disk chunks for N items
ba = BigArray(chunk_size=1024)
for i in range(N):
ba.append("item-%d" % i)
return ba
class TestSpill(unittest.TestCase):
def test_actually_spilled_to_disk(self):
ba = _make_spilled()
self.assertGreater(len(ba.chunks), 1, msg="expected multiple chunks (a disk spill)")
# stronger than "more than one chunk": at least one chunk must be a real on-disk file
# (spilled chunks are stored as filenames). Otherwise this could pass while everything
# stayed in RAM.
disk_chunks = [c for c in ba.chunks if isinstance(c, str)]
self.assertTrue(disk_chunks, msg="no chunk was spilled to disk")
self.assertTrue(os.path.exists(disk_chunks[0]), msg="spilled chunk file missing on disk")
def test_len(self):
self.assertEqual(len(_make_spilled()), N)
def test_random_access_across_boundary(self):
ba = _make_spilled()
for i in (0, 1, 499, 500, 2500, N - 1):
self.assertEqual(ba[i], "item-%d" % i, msg="ba[%d]" % i)
def test_negative_index(self):
ba = _make_spilled()
self.assertEqual(ba[-1], "item-%d" % (N - 1))
def test_iteration_order_preserved(self):
ba = _make_spilled()
for idx, value in enumerate(ba):
if value != "item-%d" % idx:
self.fail("iteration order broke at %d: %r" % (idx, value))
self.assertEqual(idx, N - 1)
def test_pop_from_end(self):
ba = _make_spilled()
self.assertEqual(ba.pop(), "item-%d" % (N - 1))
self.assertEqual(len(ba), N - 1)
def test_pickle_roundtrip_across_spill(self):
ba = _make_spilled()
restored = pickle.loads(pickle.dumps(ba))
self.assertIsInstance(restored, BigArray)
self.assertEqual(len(restored), N)
self.assertEqual(restored[0], "item-0")
self.assertEqual(restored[N - 1], "item-%d" % (N - 1))
class TestCacheConsistency(unittest.TestCase):
"""The on-disk chunk is served through a single-slot cache (read caching plus
dirty write-back). These check that the cache never serves stale data."""
def test_setitem_writeback_across_chunks(self):
ba = _make_spilled()
ref = ["item-%d" % i for i in range(N)]
# mutate elements spread across several different on-disk chunks
for i in (0, 1, 499, 500, 2500, N - 1):
ba[i] = ref[i] = "EDIT-%d" % i
try:
for i in (0, 1, 499, 500, 2500, N - 1):
self.assertEqual(ba[i], ref[i], msg="readback ba[%d]" % i)
self.assertEqual(list(ba), ref) # full independent traversal agrees
finally:
ba.close()
def test_dirty_edit_survives_pickle(self):
ba = _make_spilled()
ba[10] = "EDITED-LOW"
ba[N - 10] = "EDITED-HIGH"
restored = pickle.loads(pickle.dumps(ba))
try:
self.assertEqual(restored[10], "EDITED-LOW")
self.assertEqual(restored[N - 10], "EDITED-HIGH")
finally:
restored.close()
ba.close()
def test_pop_then_append_then_direct_read(self):
# Regression: pop() reloads the last on-disk chunk into memory and deletes its
# file, but a non-dirty cache entry still pointing at that chunk index was left
# in place. A later append that re-dumps the chunk index then made the stale
# cache serve outdated data on a direct __getitem__ (silent data corruption).
ref = ["item-%d" % i for i in range(N)]
ba = _make_spilled()
try:
cl = ba.chunk_length
last = len(ba.chunks) - 2 # last on-disk chunk (tail is the in-memory list)
base = last * cl
ba[base] # populate cache at idx=last, NOT dirty
while len(ba) > base + 1: # pop() reloads chunk 'last' from disk, removes its file
ba.pop()
ref.pop()
for i in range(cl): # re-dump chunk 'last' to a brand new temp file
value = "NEW-%d" % i
ba.append(value)
ref.append(value)
# direct access to the re-dumped chunk, with no prior read to refresh the cache
for off in range(cl):
self.assertEqual(ba[base + off], ref[base + off], msg="offset %d" % off)
finally:
ba.close()
class TestInMemorySmall(unittest.TestCase):
def test_no_spill_for_small(self):
ba = BigArray([1, 2, 3])
self.assertEqual(len(ba), 3)
self.assertEqual(list(ba), [1, 2, 3])
# the actual point of this test (the name promised it): a tiny array stays in ONE
# in-memory chunk and never touches disk
self.assertEqual(len(ba.chunks), 1, msg="small array unexpectedly spilled: %r" % (ba.chunks,))
self.assertFalse(any(isinstance(c, str) for c in ba.chunks), msg="small array wrote a disk chunk")
if __name__ == "__main__":
unittest.main(verbosity=2)