mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2026-06-20 06:28:55 +00:00
Making DuckDuckGo default search engine
This commit is contained in:
parent
000c4e5689
commit
800e6f0a56
3 changed files with 79 additions and 130 deletions
|
|
@ -20,7 +20,7 @@ from lib.core.enums import OS
|
|||
from thirdparty import six
|
||||
|
||||
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
|
||||
VERSION = "1.10.6.92"
|
||||
VERSION = "1.10.6.93"
|
||||
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
|
||||
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
|
||||
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import socket
|
|||
from lib.core.common import getSafeExString
|
||||
from lib.core.common import popValue
|
||||
from lib.core.common import pushValue
|
||||
from lib.core.common import readInput
|
||||
from lib.core.common import urlencode
|
||||
from lib.core.convert import getBytes
|
||||
from lib.core.convert import getUnicode
|
||||
|
|
@ -24,7 +23,6 @@ from lib.core.enums import HTTP_HEADER
|
|||
from lib.core.enums import REDIRECTION
|
||||
from lib.core.exception import SqlmapBaseException
|
||||
from lib.core.exception import SqlmapConnectionException
|
||||
from lib.core.exception import SqlmapUserQuitException
|
||||
from lib.core.settings import BING_REGEX
|
||||
from lib.core.settings import DUCKDUCKGO_REGEX
|
||||
from lib.core.settings import DUMMY_SEARCH_USER_AGENT
|
||||
|
|
@ -37,152 +35,102 @@ from thirdparty.six.moves import http_client as _http_client
|
|||
from thirdparty.six.moves import urllib as _urllib
|
||||
from thirdparty.socks import socks
|
||||
|
||||
def _fetch(url, headers, data=None):
|
||||
"""
|
||||
Fetches and returns the (decoded) content of a search engine results page
|
||||
(or None in case of a connection issue)
|
||||
"""
|
||||
|
||||
retVal = None
|
||||
|
||||
try:
|
||||
req = _urllib.request.Request(url, data=getBytes(data) if data else None, headers=headers)
|
||||
conn = _urllib.request.urlopen(req)
|
||||
|
||||
requestMsg = "HTTP request:\n%s %s" % ("POST" if data else "GET", url)
|
||||
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
|
||||
|
||||
page = conn.read()
|
||||
responseHeaders = conn.info()
|
||||
|
||||
responseMsg = "HTTP response (%s - %d):\n" % (conn.msg, conn.code)
|
||||
if conf.verbose <= 4:
|
||||
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
|
||||
elif conf.verbose > 4:
|
||||
responseMsg += "%s\n%s\n" % (responseHeaders, page)
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
|
||||
|
||||
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
|
||||
retVal = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
|
||||
except _urllib.error.HTTPError as ex:
|
||||
try:
|
||||
retVal = getUnicode(ex.read())
|
||||
except Exception:
|
||||
pass
|
||||
except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
|
||||
pass
|
||||
|
||||
return retVal
|
||||
|
||||
def _search(dork):
|
||||
"""
|
||||
This method performs the effective search on Google providing
|
||||
the google dork and the Google session cookie
|
||||
This method performs the effective search using the provided dork,
|
||||
trying the available search engines in order of (current) scraping
|
||||
reliability and returning the results of the first one that yields any
|
||||
(so that the failure of a single engine does not break the feature)
|
||||
"""
|
||||
|
||||
if not dork:
|
||||
return None
|
||||
|
||||
page = None
|
||||
data = None
|
||||
requestHeaders = {}
|
||||
responseHeaders = {}
|
||||
retVal = []
|
||||
seen = set()
|
||||
|
||||
requestHeaders[HTTP_HEADER.USER_AGENT] = dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT)
|
||||
requestHeaders[HTTP_HEADER.ACCEPT_ENCODING] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
|
||||
requestHeaders[HTTP_HEADER.COOKIE] = GOOGLE_CONSENT_COOKIE
|
||||
|
||||
try:
|
||||
req = _urllib.request.Request("https://www.google.com/ncr", headers=requestHeaders)
|
||||
conn = _urllib.request.urlopen(req)
|
||||
except Exception as ex:
|
||||
errMsg = "unable to connect to Google ('%s')" % getSafeExString(ex)
|
||||
raise SqlmapConnectionException(errMsg)
|
||||
requestHeaders = {
|
||||
HTTP_HEADER.USER_AGENT: dict(conf.httpHeaders).get(HTTP_HEADER.USER_AGENT, DUMMY_SEARCH_USER_AGENT),
|
||||
HTTP_HEADER.ACCEPT_ENCODING: HTTP_ACCEPT_ENCODING_HEADER_VALUE,
|
||||
HTTP_HEADER.COOKIE: GOOGLE_CONSENT_COOKIE,
|
||||
}
|
||||
|
||||
gpage = conf.googlePage if conf.googlePage > 1 else 1
|
||||
logger.info("using search result page #%d" % gpage)
|
||||
|
||||
url = "https://www.google.com/search?" # NOTE: if consent fails, try to use the "http://"
|
||||
url += "q=%s&" % urlencode(dork, convall=True)
|
||||
url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
|
||||
url += "&start=%d" % ((gpage - 1) * 100)
|
||||
encoded = urlencode(dork, convall=True)
|
||||
|
||||
try:
|
||||
req = _urllib.request.Request(url, headers=requestHeaders)
|
||||
conn = _urllib.request.urlopen(req)
|
||||
# Note: (name, url, POST data, regex, regex flags, match->link). Ordered by current scraping reliability; tried in turn until one yields results (DuckDuckGo currently being the only consistently scrapeable one)
|
||||
engines = (
|
||||
("DuckDuckGo", "https://html.duckduckgo.com/html/", "q=%s&s=%d" % (encoded, (gpage - 1) * 30), DUCKDUCKGO_REGEX, re.I | re.S, lambda match: match.group(1).replace("&", "&")),
|
||||
("Bing", "https://www.bing.com/search?q=%s&first=%d" % (encoded, (gpage - 1) * 10 + 1), None, BING_REGEX, re.I | re.S, lambda match: match.group(1)),
|
||||
("Google", "https://www.google.com/search?q=%s&num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search&start=%d" % (encoded, (gpage - 1) * 100), None, GOOGLE_REGEX, re.I, lambda match: match.group(1) or match.group(2)),
|
||||
)
|
||||
|
||||
requestMsg = "HTTP request:\nGET %s" % url
|
||||
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
|
||||
for name, url, data, regex, flags, extract in engines:
|
||||
page = _fetch(url, requestHeaders, data)
|
||||
|
||||
page = conn.read()
|
||||
code = conn.code
|
||||
status = conn.msg
|
||||
responseHeaders = conn.info()
|
||||
if not page:
|
||||
continue
|
||||
|
||||
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
|
||||
count = 0
|
||||
for match in re.finditer(regex, page, flags):
|
||||
link = _urllib.parse.unquote(extract(match))
|
||||
if link and link not in seen:
|
||||
seen.add(link)
|
||||
retVal.append(link)
|
||||
count += 1
|
||||
|
||||
if conf.verbose <= 4:
|
||||
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
|
||||
elif conf.verbose > 4:
|
||||
responseMsg += "%s\n%s\n" % (responseHeaders, page)
|
||||
if count:
|
||||
logger.info("found %d usable link%s using %s" % (count, 's' if count != 1 else "", name))
|
||||
break # Note: stop at the first engine that actually returns results (others are only fallbacks)
|
||||
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
|
||||
except _urllib.error.HTTPError as ex:
|
||||
try:
|
||||
page = ex.read()
|
||||
responseHeaders = ex.info()
|
||||
except Exception as _:
|
||||
warnMsg = "problem occurred while trying to get "
|
||||
warnMsg += "an error page information (%s)" % getSafeExString(_)
|
||||
logger.critical(warnMsg)
|
||||
return None
|
||||
except (_urllib.error.URLError, _http_client.error, socket.error, socket.timeout, socks.ProxyError):
|
||||
errMsg = "unable to connect to Google"
|
||||
raise SqlmapConnectionException(errMsg)
|
||||
|
||||
page = decodePage(page, responseHeaders.get(HTTP_HEADER.CONTENT_ENCODING), responseHeaders.get(HTTP_HEADER.CONTENT_TYPE))
|
||||
|
||||
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
|
||||
|
||||
retVal = [_urllib.parse.unquote(match.group(1) or match.group(2)) for match in re.finditer(GOOGLE_REGEX, page, re.I)]
|
||||
|
||||
if not retVal and "detected unusual traffic" in page:
|
||||
warnMsg = "Google has detected 'unusual' traffic from "
|
||||
warnMsg += "used IP address disabling further searches"
|
||||
|
||||
if conf.proxyList:
|
||||
# Note: switch proxy (if available) when an abuse/captcha page was served (instead of pointlessly falling through to the next engine from the same blocked IP)
|
||||
if conf.proxyList and (("detected unusual traffic" in page) or ("issue with the Tor Exit Node you are currently using" in page)):
|
||||
warnMsg = "%s has detected 'unusual' traffic from the used IP address" % name
|
||||
raise SqlmapBaseException(warnMsg)
|
||||
else:
|
||||
logger.critical(warnMsg)
|
||||
|
||||
if not retVal:
|
||||
message = "no usable links found. What do you want to do?"
|
||||
message += "\n[1] (re)try with DuckDuckGo (default)"
|
||||
message += "\n[2] (re)try with Bing"
|
||||
message += "\n[3] quit"
|
||||
choice = readInput(message, default='1')
|
||||
|
||||
if choice == '3':
|
||||
raise SqlmapUserQuitException
|
||||
elif choice == '2':
|
||||
url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode(dork, convall=True), (gpage - 1) * 10 + 1)
|
||||
regex = BING_REGEX
|
||||
else:
|
||||
url = "https://html.duckduckgo.com/html/"
|
||||
data = "q=%s&s=%d" % (urlencode(dork, convall=True), (gpage - 1) * 30)
|
||||
regex = DUCKDUCKGO_REGEX
|
||||
|
||||
try:
|
||||
req = _urllib.request.Request(url, data=getBytes(data), headers=requestHeaders)
|
||||
conn = _urllib.request.urlopen(req)
|
||||
|
||||
requestMsg = "HTTP request:\nGET %s" % url
|
||||
requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_OUT, requestMsg)
|
||||
|
||||
page = conn.read()
|
||||
code = conn.code
|
||||
status = conn.msg
|
||||
responseHeaders = conn.info()
|
||||
page = decodePage(page, responseHeaders.get("Content-Encoding"), responseHeaders.get("Content-Type"))
|
||||
|
||||
responseMsg = "HTTP response (%s - %d):\n" % (status, code)
|
||||
|
||||
if conf.verbose <= 4:
|
||||
responseMsg += getUnicode(responseHeaders, UNICODE_ENCODING)
|
||||
elif conf.verbose > 4:
|
||||
responseMsg += "%s\n%s\n" % (responseHeaders, page)
|
||||
|
||||
logger.log(CUSTOM_LOGGING.TRAFFIC_IN, responseMsg)
|
||||
except _urllib.error.HTTPError as ex:
|
||||
try:
|
||||
page = ex.read()
|
||||
page = decodePage(page, ex.headers.get("Content-Encoding"), ex.headers.get("Content-Type"))
|
||||
except socket.timeout:
|
||||
warnMsg = "connection timed out while trying "
|
||||
warnMsg += "to get error page information (%d)" % ex.code
|
||||
logger.critical(warnMsg)
|
||||
return None
|
||||
except:
|
||||
errMsg = "unable to connect"
|
||||
raise SqlmapConnectionException(errMsg)
|
||||
|
||||
page = getUnicode(page) # Note: if decodePage call fails (Issue #4202)
|
||||
|
||||
retVal = [_urllib.parse.unquote(match.group(1).replace("&", "&")) for match in re.finditer(regex, page, re.I | re.S)]
|
||||
|
||||
if not retVal and "issue with the Tor Exit Node you are currently using" in page:
|
||||
warnMsg = "DuckDuckGo has detected 'unusual' traffic from "
|
||||
warnMsg += "used (Tor) IP address"
|
||||
|
||||
if conf.proxyList:
|
||||
raise SqlmapBaseException(warnMsg)
|
||||
else:
|
||||
logger.critical(warnMsg)
|
||||
warnMsg = "no usable links found (search engines might be blocking the used IP address)"
|
||||
logger.critical(warnMsg)
|
||||
|
||||
return retVal
|
||||
|
||||
|
|
@ -206,6 +154,7 @@ def search(dork):
|
|||
return search(dork)
|
||||
else:
|
||||
raise
|
||||
|
||||
finally:
|
||||
kb.choices.redirect = popValue()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue