Making DuckDuckGo default search engine

2026-06-20 14:40:36 +00:00 · 2026-06-12 12:50:57 +02:00 · 2026-06-12 12:50:57 +02:00 · 800e6f0a56
commit 800e6f0a56
parent 000c4e5689
3 changed files with 79 additions and 130 deletions
--- a/lib/utils/search.py
+++ b/lib/utils/search.py
@ -11,7 +11,6 @@ import socket
 from lib.core.common import getSafeExString
 from lib.core.common import popValue
 from lib.core.common import pushValue
-from lib.core.common import readInput
 from lib.core.common import urlencode
 from lib.core.convert import getBytes
 from lib.core.convert import getUnicode
@ -24,7 +23,6 @@ from lib.core.enums import HTTP_HEADER
 from lib.core.enums import REDIRECTION
 from lib.core.exception import SqlmapBaseException
 from lib.core.exception import SqlmapConnectionException
-from lib.core.exception import SqlmapUserQuitException
 from lib.core.settings import BING_REGEX
 from lib.core.settings import DUCKDUCKGO_REGEX
 from lib.core.settings import DUMMY_SEARCH_USER_AGENT
@ -37,152 +35,102 @@ from thirdparty.six.moves import http_client as _http_client
 from thirdparty.six.moves import urllib as _urllib
 from thirdparty.socks import socks

+def _fetch(url, headers, data=None):
+    """
+    Fetches and returns the (decoded) content of a search engine results page
+    (or None in case of a connection issue)
+    """
+
+    retVal = None
+
+    try:
+        req = _urllib.request.Request(url, data=getBytes(data) if data else None, headers=headers)
+        conn = _urllib.request.urlopen(req)
+
+        requestMsg = "HTTP request:\n%s %s" % ("POST" if data else "GET", url)
+        requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
+        logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
+
+        page = conn.read()
+        responseHeaders = conn.info()
+
+        responseMsg = "HTTP response (%s - %d):\n" % (conn.msg, conn.code)
+        if conf.verbose <= 4:
+            responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
+        elif conf.verbose > 4:
+            responseMsg += "%s\n%s\n" % (responseHeaders, page)
+        logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
+
+        page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
+        retVal = getUnicode(page)  # Note: if decodePage call fails (Issue #4202)
+    except _urllib.error.HTTPError as ex:
+        try:
+            retVal = getUnicode(ex.read())
+        except Exception:
+            pass
+    except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
+        pass
+
+    return retVal
+
 def _search(dork):
    """
-    This method performs the effective search on Google providing
-    the google dork and the Google session cookie
+    This method performs the effective search using the provided dork,
+    trying the available search engines in order of (current) scraping
+    reliability and returning the results of the first one that yields any
+    (so that the failure of a single engine does not break the feature)
    """

    if not dork:
        return None

-    page = None
-    data = None
-    requestHeaders = {}
-    responseHeaders = {}
+    retVal = []
+    seen = set()

-    requestHeaders[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT)
-    requestHeaders[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
-    requestHeaders[HTTP_HEADER.COOKIE] = GOOGLE_CONSENT_COOKIE
-
-    try:
-        req = _urllib.request.Request("https://www.google.com/ncr", headers=requestHeaders)
-        conn = _urllib.request.urlopen(req)
-    except Exception as ex:
-        errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
-        raise SqlmapConnectionException(errMsg)
+    requestHeaders = {
+        HTTP_HEADER.USER_AGENT: dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT),
+        HTTP_HEADER.ACCEPT_ENCODING: HTTP_ACCEPT_ENCODING_HEADER_VALUE,
+        HTTP_HEADER.COOKIE: GOOGLE_CONSENT_COOKIE,
+    }

    gpage = conf.googlePage if conf.googlePage > 1 else 1
    logger.info("using search result page #%d" % gpage)

-    url = "https://www.google.com/search?"                                  # NOTE: if consent fails, try to use the "http://"
-    url += "q=%s&" % urlencode(dork, convall=True)
-    url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
-    url += "&start=%d" % ((gpage - 1) * 100)
+    encoded = urlencode(dork, convall=True)

-    try:
-        req = _urllib.request.Request(url, headers=requestHeaders)
-        conn = _urllib.request.urlopen(req)
+    # Note: (name, url, POST data, regex, regex flags, match->link). Ordered by current scraping reliability; tried in turn until one yields results (DuckDuckGo currently being the only consistently scrapeable one)
+    engines = (
+        ("DuckDuckGo", "https://html.duckduckgo.com/html/", "q=%s&s=%d" % (encoded, (gpage - 1) * 30), DUCKDUCKGO_REGEX, re.I | re.S, lambda match: match.group(1).replace("&amp;", "&")),
+        ("Bing", "https://www.bing.com/search?q=%s&first=%d" % (encoded, (gpage - 1) * 10 + 1), None, BING_REGEX, re.I | re.S, lambda match: match.group(1)),
+        ("Google", "https://www.google.com/search?q=%s&num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search&start=%d" % (encoded, (gpage - 1) * 100), None, GOOGLE_REGEX, re.I, lambda match: match.group(1) or match.group(2)),
+    )

-        requestMsg = "HTTP request:\nGET %s" % url
-        requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
-        logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
+    for name, url, data, regex, flags, extract in engines:
+        page = _fetch(url, requestHeaders, data)

-        page = conn.read()
-        code = conn.code
-        status = conn.msg
-        responseHeaders = conn.info()
+        if not page:
+            continue

-        responseMsg = "HTTP response (%s - %d):\n" % (status, code)
+        count = 0
+        for match in re.finditer(regex, page, flags):
+            link = _urllib.parse.unquote(extract(match))
+            if link and link not in seen:
+                seen.add(link)
+                retVal.append(link)
+                count += 1

-        if conf.verbose <= 4:
-            responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
-        elif conf.verbose > 4:
-            responseMsg += "%s\n%s\n" % (responseHeaders, page)
+        if count:
+            logger.info("found %d usable link%s using %s" % (count, 's' if count != 1 else "", name))
+            break  # Note: stop at the first engine that actually returns results (others are only fallbacks)

-        logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
-    except _urllib.error.HTTPError as ex:
-        try:
-            page = ex.read()
-            responseHeaders = ex.info()
-        except Exception as _:
-            warnMsg = "problem occurred while trying to get "
-            warnMsg += "an error page information (%s)" % getSafeExString(_)
-            logger.critical(warnMsg)
-            return None
-    except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
-        errMsg = "unable to connect to Google"
-        raise SqlmapConnectionException(errMsg)
-
-    page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
-
-    page = getUnicode(page)  # Note: if decodePage call fails (Issue #4202)
-
-    retVal = [_urllib.parse.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)]
-
-    if not retVal and "detected unusual traffic" in page:
-        warnMsg = "Google has detected 'unusual' traffic from "
-        warnMsg += "used IP address disabling further searches"
-
-        if conf.proxyList:
+        # Note: switch proxy (if available) when an abuse/captcha page was served (instead of pointlessly falling through to the next engine from the same blocked IP)
+        if conf.proxyList and (("detected unusual traffic" in page) or ("issue with the Tor Exit Node you are currently using" in page)):
+            warnMsg = "%s has detected 'unusual' traffic from the used IP address" % name
            raise SqlmapBaseException(warnMsg)
-        else:
-            logger.critical(warnMsg)

    if not retVal:
-        message = "no usable links found. What do you want to do?"
-        message += "\n[1] (re)try with DuckDuckGo (default)"
-        message += "\n[2] (re)try with Bing"
-        message += "\n[3] quit"
-        choice = readInput(message, default='1')
-
-        if choice == '3':
-            raise SqlmapUserQuitException
-        elif choice == '2':
-            url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode(dork, convall=True), (gpage - 1) * 10 + 1)
-            regex = BING_REGEX
-        else:
-            url = "https://html.duckduckgo.com/html/"
-            data = "q=%s&s=%d" % (urlencode(dork, convall=True), (gpage - 1) * 30)
-            regex = DUCKDUCKGO_REGEX
-
-        try:
-            req = _urllib.request.Request(url, data=getBytes(data), headers=requestHeaders)
-            conn = _urllib.request.urlopen(req)
-
-            requestMsg = "HTTP request:\nGET %s" % url
-            requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
-            logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
-
-            page = conn.read()
-            code = conn.code
-            status = conn.msg
-            responseHeaders = conn.info()
-            page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
-
-            responseMsg = "HTTP response (%s - %d):\n" % (status, code)
-
-            if conf.verbose <= 4:
-                responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
-            elif conf.verbose > 4:
-                responseMsg += "%s\n%s\n" % (responseHeaders, page)
-
-            logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
-        except _urllib.error.HTTPError as ex:
-            try:
-                page = ex.read()
-                page = decodePage(page, ex.headers.get("Content-Encoding"), ex.headers.get("Content-Type"))
-            except socket.timeout:
-                warnMsg = "connection timed out while trying "
-                warnMsg += "to get error page information (%d)" % ex.code
-                logger.critical(warnMsg)
-                return None
-        except:
-            errMsg = "unable to connect"
-            raise SqlmapConnectionException(errMsg)
-
-        page = getUnicode(page)  # Note: if decodePage call fails (Issue #4202)
-
-        retVal = [_urllib.parse.unquote(match.group(1).replace("&amp;", "&")) for match in re.finditer(regex, page, re.I | re.S)]
-
-        if not retVal and "issue with the Tor Exit Node you are currently using" in page:
-            warnMsg = "DuckDuckGo has detected 'unusual' traffic from "
-            warnMsg += "used (Tor) IP address"
-
-            if conf.proxyList:
-                raise SqlmapBaseException(warnMsg)
-            else:
-                logger.critical(warnMsg)
+        warnMsg = "no usable links found (search engines might be blocking the used IP address)"
+        logger.critical(warnMsg)

    return retVal

@ -206,6 +154,7 @@ def search(dork):
            return search(dork)
        else:
            raise
+
    finally:
        kb.choices.redirect = popValue()