Making DuckDuckGo default search engine

This commit is contained in:
Miroslav Štampar 2026-06-12 12:50:57 +02:00
parent 000c4e5689
commit 800e6f0a56
3 changed files with 79 additions and 130 deletions

View file

@ -11,7 +11,6 @@ import socket
from lib.core.common import getSafeExString
from lib.core.common import popValue
from lib.core.common import pushValue
from lib.core.common import readInput
from lib.core.common import urlencode
from lib.core.convert import getBytes
from lib.core.convert import getUnicode
@ -24,7 +23,6 @@ from lib.core.enums import HTTP_HEADER
from lib.core.enums import REDIRECTION
from lib.core.exception import SqlmapBaseException
from lib.core.exception import SqlmapConnectionException
from lib.core.exception import SqlmapUserQuitException
from lib.core.settings import BING_REGEX
from lib.core.settings import DUCKDUCKGO_REGEX
from lib.core.settings import DUMMY_SEARCH_USER_AGENT
@ -37,152 +35,102 @@ from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import urllib as _urllib
from thirdparty.socks import socks
def _fetch(url, headers, data=None):
"""
Fetches and returns the (decoded) content of a search engine results page
(or None in case of a connection issue)
"""
retVal = None
try:
req = _urllib.request.Request(url, data=getBytes(data) if data else None, headers=headers)
conn = _urllib.request.urlopen(req)
requestMsg = "HTTP request:\n%s %s" % ("POST" if data else "GET", url)
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
responseHeaders = conn.info()
responseMsg = "HTTP response (%s - %d):\n" % (conn.msg, conn.code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
retVal = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
except _urllib.error.HTTPError as ex:
try:
retVal = getUnicode(ex.read())
except Exception:
pass
except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
pass
return retVal
def _search(dork):
"""
This method performs the effective search on Google providing
the google dork and the Google session cookie
This method performs the effective search using the provided dork,
trying the available search engines in order of (current) scraping
reliability and returning the results of the first one that yields any
(so that the failure of a single engine does not break the feature)
"""
if not dork:
return None
page = None
data = None
requestHeaders = {}
responseHeaders = {}
retVal = []
seen = set()
requestHeaders[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT)
requestHeaders[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
requestHeaders[HTTP_HEADER.COOKIE] = GOOGLE_CONSENT_COOKIE
try:
req = _urllib.request.Request("https://www.google.com/ncr", headers=requestHeaders)
conn = _urllib.request.urlopen(req)
except Exception as ex:
errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
raise SqlmapConnectionException(errMsg)
requestHeaders = {
HTTP_HEADER.USER_AGENT: dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT),
HTTP_HEADER.ACCEPT_ENCODING: HTTP_ACCEPT_ENCODING_HEADER_VALUE,
HTTP_HEADER.COOKIE: GOOGLE_CONSENT_COOKIE,
}
gpage = conf.googlePage if conf.googlePage > 1 else 1
logger.info("using search result page #%d" % gpage)
url = "https://www.google.com/search?" # NOTE: if consent fails, try to use the "http://"
url += "q=%s&" % urlencode(dork, convall=True)
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
url += "&start=%d" % ((gpage - 1) * 100)
encoded = urlencode(dork, convall=True)
try:
req = _urllib.request.Request(url, headers=requestHeaders)
conn = _urllib.request.urlopen(req)
# Note: (name, url, POST data, regex, regex flags, match->link). Ordered by current scraping reliability; tried in turn until one yields results (DuckDuckGo currently being the only consistently scrapeable one)
engines = (
("DuckDuckGo", "https://html.duckduckgo.com/html/", "q=%s&s=%d" % (encoded, (gpage - 1) * 30), DUCKDUCKGO_REGEX, re.I | re.S, lambda match: match.group(1).replace("&amp;", "&")),
("Bing", "https://www.bing.com/search?q=%s&first=%d" % (encoded, (gpage - 1) * 10 + 1), None, BING_REGEX, re.I | re.S, lambda match: match.group(1)),
("Google", "https://www.google.com/search?q=%s&num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search&start=%d" % (encoded, (gpage - 1) * 100), None, GOOGLE_REGEX, re.I, lambda match: match.group(1) or match.group(2)),
)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
for name, url, data, regex, flags, extract in engines:
page = _fetch(url, requestHeaders, data)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
if not page:
continue
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
count = 0
for match in re.finditer(regex, page, flags):
link = _urllib.parse.unquote(extract(match))
if link and link not in seen:
seen.add(link)
retVal.append(link)
count += 1
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
if count:
logger.info("found %d usable link%s using %s" % (count, 's' if count != 1 else "", name))
break # Note: stop at the first engine that actually returns results (others are only fallbacks)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except _urllib.error.HTTPError as ex:
try:
page = ex.read()
responseHeaders = ex.info()
except Exception as _:
warnMsg = "problem occurred while trying to get "
warnMsg += "an error page information (%s)" % getSafeExString(_)
logger.critical(warnMsg)
return None
except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
errMsg = "unable to connect to Google"
raise SqlmapConnectionException(errMsg)
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
retVal = [_urllib.parse.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)]
if not retVal and "detected unusual traffic" in page:
warnMsg = "Google has detected 'unusual' traffic from "
warnMsg += "used IP address disabling further searches"
if conf.proxyList:
# Note: switch proxy (if available) when an abuse/captcha page was served (instead of pointlessly falling through to the next engine from the same blocked IP)
if conf.proxyList and (("detected unusual traffic" in page) or ("issue with the Tor Exit Node you are currently using" in page)):
warnMsg = "%s has detected 'unusual' traffic from the used IP address" % name
raise SqlmapBaseException(warnMsg)
else:
logger.critical(warnMsg)
if not retVal:
message = "no usable links found. What do you want to do?"
message += "\n[1] (re)try with DuckDuckGo (default)"
message += "\n[2] (re)try with Bing"
message += "\n[3] quit"
choice = readInput(message, default='1')
if choice == '3':
raise SqlmapUserQuitException
elif choice == '2':
url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode(dork, convall=True), (gpage - 1) * 10 + 1)
regex = BING_REGEX
else:
url = "https://html.duckduckgo.com/html/"
data = "q=%s&s=%d" % (urlencode(dork, convall=True), (gpage - 1) * 30)
regex = DUCKDUCKGO_REGEX
try:
req = _urllib.request.Request(url, data=getBytes(data), headers=requestHeaders)
conn = _urllib.request.urlopen(req)
requestMsg = "HTTP request:\nGET %s" % url
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
page = conn.read()
code = conn.code
status = conn.msg
responseHeaders = conn.info()
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
if conf.verbose <= 4:
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
elif conf.verbose > 4:
responseMsg += "%s\n%s\n" % (responseHeaders, page)
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
except _urllib.error.HTTPError as ex:
try:
page = ex.read()
page = decodePage(page, ex.headers.get("Content-Encoding"), ex.headers.get("Content-Type"))
except socket.timeout:
warnMsg = "connection timed out while trying "
warnMsg += "to get error page information (%d)" % ex.code
logger.critical(warnMsg)
return None
except:
errMsg = "unable to connect"
raise SqlmapConnectionException(errMsg)
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
retVal = [_urllib.parse.unquote(match.group(1).replace("&amp;", "&")) for match in re.finditer(regex, page, re.I | re.S)]
if not retVal and "issue with the Tor Exit Node you are currently using" in page:
warnMsg = "DuckDuckGo has detected 'unusual' traffic from "
warnMsg += "used (Tor) IP address"
if conf.proxyList:
raise SqlmapBaseException(warnMsg)
else:
logger.critical(warnMsg)
warnMsg = "no usable links found (search engines might be blocking the used IP address)"
logger.critical(warnMsg)
return retVal
@ -206,6 +154,7 @@ def search(dork):
return search(dork)
else:
raise
finally:
kb.choices.redirect = popValue()