From 800e6f0a562a024b7fd2de45ed966539767ee567 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0tampar?= Date: Fri, 12 Jun 2026 12:50:57 +0200 Subject: [PATCH] Making DuckDuckGo default search engine --- data/txt/sha256sums.txt | 4 +- lib/core/settings.py | 2 +- lib/utils/search.py | 203 +++++++++++++++------------------------- 3 files changed, 79 insertions(+), 130 deletions(-) diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index ab6fb887c..9031861c3 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -188,7 +188,7 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data. 48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -33cecddfb0e039523c6ef758f88b5b864c5d24e061fac58e481be05c628f12b3 lib/core/settings.py +118f716132dc29f5fb5692d30948f2d73295fc2638571abc5711bcfeaa7f4a0e lib/core/settings.py cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py 70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py @@ -255,7 +255,7 @@ e7d31de0e268c129ee11c590eb618f73a85e1022c08b8ed1f77753043c949214 lib/utils/pivo c1dfc3bed0fed9b181f612d1d747955dd2b506dbe99bc9fd481495602371473a lib/utils/progress.py 27afe211030d06db28df85296bfbf698296c94440904c390cef0ff0c259dbbc5 lib/utils/purge.py f635872093a12cd63a72d77adf88e8f8cd4084a5cc64384f12966cd75a499bdf lib/utils/safe2bin.py -2ee72e83500a1bf02fcd942564fca0053a0c46f736286f0c35dd6904e09f4734 lib/utils/search.py +de4be7e291db0962cd59f9c04b3f7259f846e315df1fd9b323954f89fae0b2db lib/utils/search.py 8258d0f54ad94e6101934971af4e55d5540f217c40ddcc594e2fba837b856d35 lib/utils/sgmllib.py 92361b3c14ca472f0f89c275814da021c4f0e2de6ffa1bffc691b4cdc38d59dc lib/utils/sqlalchemy.py f0e5525a92fe971defc8f74c27942ff9138b1e8251f2e0d9a8bd59285b656084 lib/utils/timeout.py diff --git a/lib/core/settings.py b/lib/core/settings.py index c3741a4e3..ab503580a 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.92" +VERSION = "1.10.6.93" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/utils/search.py b/lib/utils/search.py index 985226891..4e98a12f5 100644 --- a/lib/utils/search.py +++ b/lib/utils/search.py @@ -11,7 +11,6 @@ import socket from lib.core.common import getSafeExString from lib.core.common import popValue from lib.core.common import pushValue -from lib.core.common import readInput from lib.core.common import urlencode from lib.core.convert import getBytes from lib.core.convert import getUnicode @@ -24,7 +23,6 @@ from lib.core.enums import HTTP_HEADER from lib.core.enums import REDIRECTION from lib.core.exception import SqlmapBaseException from lib.core.exception import SqlmapConnectionException -from lib.core.exception import SqlmapUserQuitException from lib.core.settings import BING_REGEX from lib.core.settings import DUCKDUCKGO_REGEX from lib.core.settings import DUMMY_SEARCH_USER_AGENT @@ -37,152 +35,102 @@ from thirdparty.six.moves import http_client as _http_client from thirdparty.six.moves import urllib as _urllib from thirdparty.socks import socks +def _fetch(url, headers, data=None): + """ + Fetches and returns the (decoded) content of a search engine results page + (or None in case of a connection issue) + """ + + retVal = None + + try: + req = _urllib.request.Request(url, data=getBytes(data) if data else None, headers=headers) + conn = _urllib.request.urlopen(req) + + requestMsg = "HTTP request:\n%s %s" % ("POST" if data else "GET", url) + requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str + logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) + + page = conn.read() + responseHeaders = conn.info() + + responseMsg = "HTTP response (%s - %d):\n" % (conn.msg, conn.code) + if conf.verbose <= 4: + responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING) + elif conf.verbose > 4: + responseMsg += "%s\n%s\n" % (responseHeaders, page) + logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg) + + page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE)) + retVal = getUnicode(page) # Note: if decodePage call fails (Issue #4202) + except _urllib.error.HTTPError as ex: + try: + retVal = getUnicode(ex.read()) + except Exception: + pass + except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError): + pass + + return retVal + def _search(dork): """ - This method performs the effective search on Google providing - the google dork and the Google session cookie + This method performs the effective search using the provided dork, + trying the available search engines in order of (current) scraping + reliability and returning the results of the first one that yields any + (so that the failure of a single engine does not break the feature) """ if not dork: return None - page = None - data = None - requestHeaders = {} - responseHeaders = {} + retVal = [] + seen = set() - requestHeaders[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT) - requestHeaders[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE - requestHeaders[HTTP_HEADER.COOKIE] = GOOGLE_CONSENT_COOKIE - - try: - req = _urllib.request.Request("https://www.google.com/ncr", headers=requestHeaders) - conn = _urllib.request.urlopen(req) - except Exception as ex: - errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex) - raise SqlmapConnectionException(errMsg) + requestHeaders = { + HTTP_HEADER.USER_AGENT: dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT), + HTTP_HEADER.ACCEPT_ENCODING: HTTP_ACCEPT_ENCODING_HEADER_VALUE, + HTTP_HEADER.COOKIE: GOOGLE_CONSENT_COOKIE, + } gpage = conf.googlePage if conf.googlePage > 1 else 1 logger.info("using search result page #%d" % gpage) - url = "https://www.google.com/search?" # NOTE: if consent fails, try to use the "http://" - url += "q=%s&" % urlencode(dork, convall=True) - url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search" - url += "&start=%d" % ((gpage - 1) * 100) + encoded = urlencode(dork, convall=True) - try: - req = _urllib.request.Request(url, headers=requestHeaders) - conn = _urllib.request.urlopen(req) + # Note: (name, url, POST data, regex, regex flags, match->link). Ordered by current scraping reliability; tried in turn until one yields results (DuckDuckGo currently being the only consistently scrapeable one) + engines = ( + ("DuckDuckGo", "https://html.duckduckgo.com/html/", "q=%s&s=%d" % (encoded, (gpage - 1) * 30), DUCKDUCKGO_REGEX, re.I | re.S, lambda match: match.group(1).replace("&", "&")), + ("Bing", "https://www.bing.com/search?q=%s&first=%d" % (encoded, (gpage - 1) * 10 + 1), None, BING_REGEX, re.I | re.S, lambda match: match.group(1)), + ("Google", "https://www.google.com/search?q=%s&num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search&start=%d" % (encoded, (gpage - 1) * 100), None, GOOGLE_REGEX, re.I, lambda match: match.group(1) or match.group(2)), + ) - requestMsg = "HTTP request:\nGET %s" % url - requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str - logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) + for name, url, data, regex, flags, extract in engines: + page = _fetch(url, requestHeaders, data) - page = conn.read() - code = conn.code - status = conn.msg - responseHeaders = conn.info() + if not page: + continue - responseMsg = "HTTP response (%s - %d):\n" % (status, code) + count = 0 + for match in re.finditer(regex, page, flags): + link = _urllib.parse.unquote(extract(match)) + if link and link not in seen: + seen.add(link) + retVal.append(link) + count += 1 - if conf.verbose <= 4: - responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING) - elif conf.verbose > 4: - responseMsg += "%s\n%s\n" % (responseHeaders, page) + if count: + logger.info("found %d usable link%s using %s" % (count, 's' if count != 1 else "", name)) + break # Note: stop at the first engine that actually returns results (others are only fallbacks) - logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg) - except _urllib.error.HTTPError as ex: - try: - page = ex.read() - responseHeaders = ex.info() - except Exception as _: - warnMsg = "problem occurred while trying to get " - warnMsg += "an error page information (%s)" % getSafeExString(_) - logger.critical(warnMsg) - return None - except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError): - errMsg = "unable to connect to Google" - raise SqlmapConnectionException(errMsg) - - page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE)) - - page = getUnicode(page) # Note: if decodePage call fails (Issue #4202) - - retVal = [_urllib.parse.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)] - - if not retVal and "detected unusual traffic" in page: - warnMsg = "Google has detected 'unusual' traffic from " - warnMsg += "used IP address disabling further searches" - - if conf.proxyList: + # Note: switch proxy (if available) when an abuse/captcha page was served (instead of pointlessly falling through to the next engine from the same blocked IP) + if conf.proxyList and (("detected unusual traffic" in page) or ("issue with the Tor Exit Node you are currently using" in page)): + warnMsg = "%s has detected 'unusual' traffic from the used IP address" % name raise SqlmapBaseException(warnMsg) - else: - logger.critical(warnMsg) if not retVal: - message = "no usable links found. What do you want to do?" - message += "\n[1] (re)try with DuckDuckGo (default)" - message += "\n[2] (re)try with Bing" - message += "\n[3] quit" - choice = readInput(message, default='1') - - if choice == '3': - raise SqlmapUserQuitException - elif choice == '2': - url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode(dork, convall=True), (gpage - 1) * 10 + 1) - regex = BING_REGEX - else: - url = "https://html.duckduckgo.com/html/" - data = "q=%s&s=%d" % (urlencode(dork, convall=True), (gpage - 1) * 30) - regex = DUCKDUCKGO_REGEX - - try: - req = _urllib.request.Request(url, data=getBytes(data), headers=requestHeaders) - conn = _urllib.request.urlopen(req) - - requestMsg = "HTTP request:\nGET %s" % url - requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str - logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg) - - page = conn.read() - code = conn.code - status = conn.msg - responseHeaders = conn.info() - page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type")) - - responseMsg = "HTTP response (%s - %d):\n" % (status, code) - - if conf.verbose <= 4: - responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING) - elif conf.verbose > 4: - responseMsg += "%s\n%s\n" % (responseHeaders, page) - - logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg) - except _urllib.error.HTTPError as ex: - try: - page = ex.read() - page = decodePage(page, ex.headers.get("Content-Encoding"), ex.headers.get("Content-Type")) - except socket.timeout: - warnMsg = "connection timed out while trying " - warnMsg += "to get error page information (%d)" % ex.code - logger.critical(warnMsg) - return None - except: - errMsg = "unable to connect" - raise SqlmapConnectionException(errMsg) - - page = getUnicode(page) # Note: if decodePage call fails (Issue #4202) - - retVal = [_urllib.parse.unquote(match.group(1).replace("&", "&")) for match in re.finditer(regex, page, re.I | re.S)] - - if not retVal and "issue with the Tor Exit Node you are currently using" in page: - warnMsg = "DuckDuckGo has detected 'unusual' traffic from " - warnMsg += "used (Tor) IP address" - - if conf.proxyList: - raise SqlmapBaseException(warnMsg) - else: - logger.critical(warnMsg) + warnMsg = "no usable links found (search engines might be blocking the used IP address)" + logger.critical(warnMsg) return retVal @@ -206,6 +154,7 @@ def search(dork): return search(dork) else: raise + finally: kb.choices.redirect = popValue()