Finish integration

pull/915/head
Moist-Cat 2 years ago
parent b26aeec173
commit 9e8c48a882

@ -331,11 +331,23 @@ class Request:
proxies=self.proxies,
headers=headers,
cookies=cookies)
if response.status_code == "429":
if response.status_code == 429:
# google's CAPTCHA
# we have to handle it here because we filter out scripts from the page source
# later
captcha.solve(response.text)
print("WARN: CAPTCHA detected")
solved = captcha.solve(response, self.proxies, url=self.search_url + query)
if solved:
print("INFO: CAPTCHA solved. Retrying...")
response = requests.get(
(base_url or self.search_url) + query,
proxies=self.proxies,
headers=headers,
cookies=cookies
)
if response.status_code == 429:
print("ERROR: It seems our IP is still blacklisted")
# Retry query with new identity if using Tor (max 10 attempts)
if 'form id="captcha-form"' in response.text and self.tor:

@ -318,6 +318,17 @@ def search():
translation = app.config['TRANSLATIONS'][localization_lang]
translate_to = localization_lang.replace('lang_', '')
# Return 503 if temporarily blocked by captcha
if has_captcha(str(response)):
return render_template(
'error.html',
blocked=True,
error_message=translation['ratelimit'],
translation=translation,
farside='https://farside.link',
config=g.user_config,
query=urlparse.unquote(query),
params=g.user_config.to_params(keys=['preferences'])), 503
response = bold_search_terms(response, query)
# Feature to display IP address

@ -3,6 +3,8 @@ Itegration with third party CAPTCHA solving services
"""
# only deathbycaptcha atm but whatever
import os
import json
import requests
from bs4 import BeautifulSoup as bs
@ -11,7 +13,6 @@ try:
except ImportError:
deathbycaptcha = None
class UnableToSolve(Exception):
"""
The third-party service was unable to solve the CAPTCHA
@ -25,7 +26,6 @@ def parse_params(response):
params = {
"googlekey": "",
"data-s": "",
"pageurl": "",
}
soup = bs(response.text)
@ -37,27 +37,53 @@ def parse_params(response):
"Couldn't find the element with the CAPTCHA params"
"Are you sure this page contains Google's reCAPTCHA v2 with callback?"
)
hidden_q = soup.find(type="hidden")
params["q"] = hidden_q.attrs["value"]
params["googlekey"] = recaptcha.attrs["data-sitekey"]
params["data-s"] = recaptcha.attrs["data-s"]
params["pageurl"] = response.url
return params
def solve(response):
def solve(response, proxies, url):
"""
Get a response with a reCAPTCHA v2 and solve it using a third-party service
"""
if deathbycaptcha is None:
raise ImportError("The deathbycaptcha client is not installed")
print("WARN: The deathbycaptcha client is not installed")
return False
client = deathbycaptcha.HttpClient(
os.environ.get("DBC_USER", "username"), os.environ.get("DBC_PASS", "password")
)
params = parse_params(response)
params["pageurl"] = url
params["proxy"] = proxies.get("https", None)
params["proxytype"] = "HTTP"
q = params.pop("q")
token = ""
try:
token = client.decode(type=4, token_params=json.dumps(params))
except Exception as exc:
print(
"ERROR: Deathbycaptcha was unable to solve the captcha. Original exception:", exc
)
return False
token = client.decode(type=4, token_params=params)
if not token or token == "?":
raise UnableToSolve("Deathbycaptcha was unable to solve the captcha")
if not token or token.get("is_correct", "false") == "false":
print("ERROR: Deathbycaptcha was unable to solve the captcha")
return False
text = token.get("text", None)
if text:
form_params = {
"q": q,
"continue": url,
"g-recaptcha-response": text,
}
response = requests.post("https://www.google.com/sorry/index", data=form_params, proxies=proxies)
print(response, form_params, response.text)
return True
return False

@ -26,9 +26,3 @@ Our systems have detected unusual traffic from your computer network. This page
<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help &mdash; a different computer using the same IP address may be responsible. <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
</div>
IP address: 200.105.215.22<br>Time: 2022-12-13T16:32:06Z<br>URL: https://www.google.com/search?gbv=1&amp;num=10&amp;q=Liddell&amp;safe=off<br>
</div>
</div>
</body>
</html>

Loading…
Cancel
Save