#!/usr/bin/env python """ Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org) See the file 'LICENSE' for copying permission Tests for lib/utils/sgmllib.py -- the SGML/HTML parser used internally by sqlmap for page content analysis. Exercises the parser with valid SGML/HTML constructs and verifies the event stream. """ import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from _testutils import bootstrap bootstrap() from lib.utils.sgmllib import SGMLParser class RecordingParser(SGMLParser): """SGMLParser subclass that records parse events AND delegates to parent.""" def __init__(self): SGMLParser.__init__(self) self.events = [] def _gather_data(self): """Extract concatenated text from data events.""" return "".join(body for ev in self.events if ev[0] == "data" for body in (ev[1],)) def handle_data(self, data): self.events.append(("data", data)) def handle_comment(self, data): self.events.append(("comment", data)) SGMLParser.handle_comment(self, data) def handle_decl(self, decl): self.events.append(("decl", decl)) def handle_pi(self, data): self.events.append(("pi", data)) def handle_charref(self, name): self.events.append(("charref", name)) SGMLParser.handle_charref(self, name) # do the actual conversion -> handle_data def handle_entityref(self, name): self.events.append(("entityref", name)) SGMLParser.handle_entityref(self, name) # do the actual conversion -> handle_data def unknown_starttag(self, tag, attrs): self.events.append(("start", tag, attrs)) def unknown_endtag(self, tag): self.events.append(("end", tag)) def unknown_charref(self, ref): self.events.append(("unknown_charref", ref)) def unknown_entityref(self, ref): self.events.append(("unknown_entityref", ref)) class TestBasicParsing(unittest.TestCase): def setUp(self): self.p = RecordingParser() def test_plain_text(self): self.p.feed("hello world") self.p.close() self.assertEqual(self.p._gather_data(), "hello world") def test_simple_start_and_end_tag(self): self.p.feed("
text
") self.p.close() self.assertIn(("start", "p", []), self.p.events) self.assertIn(("data", "text"), self.p.events) self.assertIn(("end", "p"), self.p.events) def test_nested_tags(self): self.p.feed("abc
": self.p.feed(ch) self.p.close() self.assertIn(("start", "p", []), self.p.events) self.assertIn(("end", "p"), self.p.events) self.assertEqual(self.p._gather_data(), "abc") def test_multiple_feeds(self): self.p.feed("first
") self.p.feed("second
") self.p.close() starts = [e for e in self.p.events if e[0] == "start"] self.assertEqual(len(starts), 2) self.assertEqual(self.p._gather_data(), "firstsecond") class TestEntityConversion(unittest.TestCase): def test_convert_entityref_known(self): p = SGMLParser() self.assertEqual(p.convert_entityref("lt"), "<") self.assertEqual(p.convert_entityref("gt"), ">") self.assertEqual(p.convert_entityref("amp"), "&") self.assertEqual(p.convert_entityref("quot"), '"') self.assertEqual(p.convert_entityref("apos"), "'") def test_convert_entityref_unknown(self): p = SGMLParser() self.assertIsNone(p.convert_entityref("unknown")) def test_convert_charref_valid(self): p = SGMLParser() self.assertEqual(p.convert_charref("65"), "A") self.assertEqual(p.convert_charref("97"), "a") def test_convert_charref_invalid(self): p = SGMLParser() self.assertIsNone(p.convert_charref("notanumber")) self.assertIsNone(p.convert_charref("9999")) # > 127 def test_convert_codepoint(self): p = SGMLParser() self.assertEqual(p.convert_codepoint(65), "A") class TestCustomEntitydefs(unittest.TestCase): def test_custom_entity(self): p = RecordingParser() p.entitydefs["copy"] = "\xa9" p.feed("©") p.close() self.assertEqual(p._gather_data(), "\xa9") class TestGetStarttagText(unittest.TestCase): def test_starttag_text(self): p = RecordingParser() p.feed("raw text
") p.close() self.assertEqual(p._gather_data(), "raw text
") class TestReset(unittest.TestCase): def test_reset_clears_parser_state(self): p = RecordingParser() p.feed("hello
") # verify rawdata is cleared after close self.assertEqual(p.rawdata, "") p.reset() self.assertEqual(p.stack, []) self.assertEqual(p.lasttag, "???") class TestVerbose(unittest.TestCase): # In this parser, `verbose` only gates the debug printing emitted by # report_unbalanced() (an unbalanced for which an end_