Merge c694a8b6c4
into 9bfdd88a5e
commit
c83eb6a5d8
@ -0,0 +1,82 @@
|
||||
"""
|
||||
Itegration with third party CAPTCHA solving services
|
||||
"""
|
||||
# only deathbycaptcha atm but whatever
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
try:
|
||||
import deathbycaptcha
|
||||
except ImportError:
|
||||
deathbycaptcha = None
|
||||
|
||||
def parse_params(response):
|
||||
"""
|
||||
Parses a page with bs4 to fetch the data needed to solve the captcha.
|
||||
"""
|
||||
params = {
|
||||
"googlekey": "",
|
||||
"data-s": "",
|
||||
}
|
||||
soup = bs(response.text, "html.parser")
|
||||
|
||||
recaptcha = soup.find(id="recaptcha")
|
||||
if not recaptcha:
|
||||
# i could save the page for debugging since this is usually
|
||||
# hard to reproduce
|
||||
raise AttributeError(
|
||||
"Couldn't find the element with the CAPTCHA params"
|
||||
"Are you sure this page contains Google's reCAPTCHA v2 with callback?"
|
||||
)
|
||||
hidden_q = soup.find(type="hidden")
|
||||
params["q"] = hidden_q.attrs["value"]
|
||||
params["googlekey"] = recaptcha.attrs["data-sitekey"]
|
||||
params["data-s"] = recaptcha.attrs["data-s"]
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def solve(response, proxies, url):
|
||||
"""
|
||||
Get a response with a reCAPTCHA v2 and solve it using a third-party service.
|
||||
"""
|
||||
if deathbycaptcha is None:
|
||||
print("WARN: The deathbycaptcha client is not installed")
|
||||
return False
|
||||
|
||||
client = deathbycaptcha.HttpClient(
|
||||
os.environ.get("DBC_USER", "username"), os.environ.get("DBC_PASS", "password")
|
||||
)
|
||||
|
||||
params = parse_params(response)
|
||||
params["pageurl"] = url
|
||||
params["proxy"] = proxies.get("https", None)
|
||||
params["proxytype"] = "HTTP"
|
||||
|
||||
q = params.pop("q")
|
||||
|
||||
token = ""
|
||||
try:
|
||||
token = client.decode(type=4, token_params=json.dumps(params))
|
||||
except Exception as exc:
|
||||
print(
|
||||
"ERROR: Deathbycaptcha was unable to solve the captcha. Original exception:", exc
|
||||
)
|
||||
return False
|
||||
|
||||
if not token or token.get("is_correct", "false") == "false":
|
||||
print("ERROR: Deathbycaptcha was unable to solve the captcha")
|
||||
return False
|
||||
text = token.get("text", None)
|
||||
if text:
|
||||
form_params = {
|
||||
"q": q,
|
||||
"continue": url,
|
||||
"g-recaptcha-response": text,
|
||||
}
|
||||
response = requests.post("https://www.google.com/sorry/index", data=form_params, proxies=proxies)
|
||||
return True
|
||||
return False
|
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Test the integration with third-party CAPTCHA solving services
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from argparse import Namespace
|
||||
|
||||
from app.utils import captcha
|
||||
|
||||
TEST_FILES = Path(__file__).parent / "test_files"
|
||||
|
||||
|
||||
def test_parse():
|
||||
"""
|
||||
Test the parsing functionality
|
||||
"""
|
||||
|
||||
with open(TEST_FILES / "recaptcha_v2_callback.html") as file:
|
||||
text = file.read()
|
||||
# primitive mock
|
||||
response = Namespace()
|
||||
response.url = "https://www.google.com/search?gbv=1&num=10&q=Liddell&safe=off"
|
||||
response.text = text
|
||||
|
||||
res = captcha.parse_params(response)
|
||||
|
||||
data_s = (
|
||||
"I_wQ5kiIMUbCdcGyC1x6zzK70nD"
|
||||
"G9kViGr7TS6zaiWsIdZXcmQGoaxN"
|
||||
"hiGulX8tD_xNYFXLRkLFSkxDnrkIr"
|
||||
"5o5xSw2Sj1Z-bs5dqP2TyQFGBaTZFY"
|
||||
"sRBy3CoDJruyranhLqWoWb3mdxvgUb"
|
||||
"kpS7ZkRSFYFP_dg9WV4rIQxa6OUmrAt"
|
||||
"S6JKw_UbHN8tJ4mCpz6BKYsGB_fjyD9"
|
||||
"fuRrzmn2RK8FzsOAiLEWBc0z5Qxdltd"
|
||||
"owqO1ugNxQdSaqM39pF73cCAqWqEama"
|
||||
"RRa9iOOVflHptIHjo88"
|
||||
)
|
||||
|
||||
expected = {
|
||||
"googlekey": "6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b",
|
||||
"data-s": data_s,
|
||||
'q': 'EgTIadcWGIXS4pwGIjDL-1ocR_DlZgts3Rfama1w7aWKF_5y2vFWA8eORDe5SvseqGuuMVzIObjhBnZPpgAyAXI'
|
||||
}
|
||||
|
||||
message = "Results differ\n" f"Expected: {expected}\n" f"Got: {res}"
|
||||
assert res == expected, message
|
@ -0,0 +1,28 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head><meta http-equiv="content-type" content="text/html; charset=utf-8"><meta name="viewport" content="initial-scale=1"><title>https://www.google.com/search?gbv=1&num=10&q=Liddell&safe=off</title></head>
|
||||
<body style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();} if(solveSimpleChallenge) {solveSimpleChallenge(,);}">
|
||||
<div style="max-width:400px;">
|
||||
<hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br>
|
||||
<form id="captcha-form" action="index" method="post">
|
||||
<noscript>
|
||||
<div style="font-size:13px;">
|
||||
In order to continue, please enable javascript on your web browser.
|
||||
</div>
|
||||
</noscript>
|
||||
<script src="https://www.google.com/recaptcha/api.js" async defer></script>
|
||||
<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
|
||||
<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="I_wQ5kiIMUbCdcGyC1x6zzK70nDG9kViGr7TS6zaiWsIdZXcmQGoaxNhiGulX8tD_xNYFXLRkLFSkxDnrkIr5o5xSw2Sj1Z-bs5dqP2TyQFGBaTZFYsRBy3CoDJruyranhLqWoWb3mdxvgUbkpS7ZkRSFYFP_dg9WV4rIQxa6OUmrAtS6JKw_UbHN8tJ4mCpz6BKYsGB_fjyD9fuRrzmn2RK8FzsOAiLEWBc0z5QxdltdowqO1ugNxQdSaqM39pF73cCAqWqEamaRRa9iOOVflHptIHjo88"></div>
|
||||
|
||||
<input type='hidden' name='q' value='EgTIadcWGIXS4pwGIjDL-1ocR_DlZgts3Rfama1w7aWKF_5y2vFWA8eORDe5SvseqGuuMVzIObjhBnZPpgAyAXI'><input type="hidden" name="continue" value="https://www.google.com/search?gbv=1&num=10&q=Liddell&safe=off">
|
||||
</form>
|
||||
<hr noshade size="1" style="color:#ccc; background-color:#ccc;">
|
||||
|
||||
<div style="font-size:13px;">
|
||||
<b>About this page</b><br><br>
|
||||
|
||||
Our systems have detected unusual traffic from your computer network. This page checks to see if it's really you sending the requests, and not a robot. <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br>
|
||||
|
||||
<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
|
||||
This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help — a different computer using the same IP address may be responsible. <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
|
||||
</div>
|
Loading…
Reference in New Issue