2023-11-20 13:00:40 +00:00
|
|
|
from __future__ import annotations
|
2024-01-26 06:54:13 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
from platformdirs import user_config_dir
|
2024-03-16 19:02:15 +00:00
|
|
|
from undetected_chromedriver import Chrome, ChromeOptions, find_chrome_executable
|
2024-01-26 06:54:13 +00:00
|
|
|
from selenium.webdriver.remote.webdriver import WebDriver
|
2024-01-26 11:49:52 +00:00
|
|
|
from selenium.webdriver.remote.webelement import WebElement
|
2024-01-26 06:54:13 +00:00
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
2024-01-26 11:49:52 +00:00
|
|
|
from selenium.webdriver.common.keys import Keys
|
2024-02-09 13:24:15 +00:00
|
|
|
from selenium.common.exceptions import NoSuchElementException
|
2024-01-26 06:54:13 +00:00
|
|
|
has_requirements = True
|
|
|
|
except ImportError:
|
2024-03-12 17:45:22 +00:00
|
|
|
from typing import Type as WebDriver
|
2024-01-26 06:54:13 +00:00
|
|
|
has_requirements = False
|
2024-01-27 01:00:44 +00:00
|
|
|
|
2024-03-12 01:06:06 +00:00
|
|
|
import time
|
2024-01-28 19:28:37 +00:00
|
|
|
from shutil import which
|
2023-12-07 06:18:05 +00:00
|
|
|
from os import path
|
2024-01-20 17:36:04 +00:00
|
|
|
from os import access, R_OK
|
2024-01-29 17:14:46 +00:00
|
|
|
from .typing import Cookies
|
2024-01-26 06:54:13 +00:00
|
|
|
from .errors import MissingRequirementsError
|
2023-12-06 08:35:36 +00:00
|
|
|
from . import debug
|
2023-12-02 04:40:07 +00:00
|
|
|
|
2023-11-20 13:00:40 +00:00
|
|
|
try:
|
|
|
|
from pyvirtualdisplay import Display
|
|
|
|
has_pyvirtualdisplay = True
|
|
|
|
except ImportError:
|
|
|
|
has_pyvirtualdisplay = False
|
|
|
|
|
2024-03-12 01:06:06 +00:00
|
|
|
try:
|
|
|
|
from undetected_chromedriver import Chrome as _Chrome, ChromeOptions
|
|
|
|
from seleniumwire.webdriver import InspectRequestsMixin, DriverCommonMixin
|
|
|
|
|
|
|
|
class Chrome(InspectRequestsMixin, DriverCommonMixin, _Chrome):
|
|
|
|
def __init__(self, *args, options=None, seleniumwire_options={}, **kwargs):
|
|
|
|
if options is None:
|
|
|
|
options = ChromeOptions()
|
|
|
|
config = self._setup_backend(seleniumwire_options)
|
|
|
|
options.add_argument(f"--proxy-server={config['proxy']['httpProxy']}")
|
2024-03-16 19:02:15 +00:00
|
|
|
options.add_argument("--proxy-bypass-list=<-loopback>")
|
2024-03-12 01:06:06 +00:00
|
|
|
options.add_argument("--ignore-certificate-errors")
|
|
|
|
super().__init__(*args, options=options, **kwargs)
|
|
|
|
has_seleniumwire = True
|
|
|
|
except:
|
|
|
|
has_seleniumwire = False
|
|
|
|
|
2023-11-20 13:00:40 +00:00
|
|
|
def get_browser(
|
|
|
|
user_data_dir: str = None,
|
|
|
|
headless: bool = False,
|
|
|
|
proxy: str = None,
|
|
|
|
options: ChromeOptions = None
|
2023-12-02 04:40:07 +00:00
|
|
|
) -> WebDriver:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-14 14:04:37 +00:00
|
|
|
Creates and returns a Chrome WebDriver with specified options.
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2024-01-14 14:04:37 +00:00
|
|
|
Args:
|
|
|
|
user_data_dir (str, optional): Directory for user data. If None, uses default directory.
|
|
|
|
headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
|
|
|
|
proxy (str, optional): Proxy settings for the browser. Defaults to None.
|
|
|
|
options (ChromeOptions, optional): ChromeOptions object with specific browser options. Defaults to None.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
WebDriver: An instance of WebDriver configured with the specified options.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-26 06:54:13 +00:00
|
|
|
if not has_requirements:
|
|
|
|
raise MissingRequirementsError('Install "undetected_chromedriver" and "platformdirs" package')
|
2024-03-16 19:02:15 +00:00
|
|
|
browser = find_chrome_executable()
|
|
|
|
if browser is None:
|
|
|
|
raise MissingRequirementsError('Install "Google Chrome" browser')
|
2024-01-14 06:45:41 +00:00
|
|
|
if user_data_dir is None:
|
2023-11-20 13:00:40 +00:00
|
|
|
user_data_dir = user_config_dir("g4f")
|
2023-12-07 06:18:05 +00:00
|
|
|
if user_data_dir and debug.logging:
|
|
|
|
print("Open browser with config dir:", user_data_dir)
|
2023-12-02 04:40:07 +00:00
|
|
|
if not options:
|
|
|
|
options = ChromeOptions()
|
2023-11-20 13:00:40 +00:00
|
|
|
if proxy:
|
|
|
|
options.add_argument(f'--proxy-server={proxy}')
|
2024-01-20 17:36:04 +00:00
|
|
|
# Check for system driver in docker
|
2024-01-29 17:14:46 +00:00
|
|
|
driver = which('chromedriver') or '/usr/bin/chromedriver'
|
2024-01-20 17:36:04 +00:00
|
|
|
if not path.isfile(driver) or not access(driver, R_OK):
|
2023-12-06 08:35:36 +00:00
|
|
|
driver = None
|
2023-12-07 06:18:05 +00:00
|
|
|
return Chrome(
|
|
|
|
options=options,
|
|
|
|
user_data_dir=user_data_dir,
|
|
|
|
driver_executable_path=driver,
|
2024-03-16 19:02:15 +00:00
|
|
|
browser_executable_path=browser,
|
2024-02-09 17:11:35 +00:00
|
|
|
headless=headless,
|
|
|
|
patcher_force_close=True
|
2023-12-07 06:18:05 +00:00
|
|
|
)
|
|
|
|
|
2024-01-29 17:14:46 +00:00
|
|
|
def get_driver_cookies(driver: WebDriver) -> Cookies:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-14 14:04:37 +00:00
|
|
|
Retrieves cookies from the specified WebDriver.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
driver (WebDriver): The WebDriver instance from which to retrieve cookies.
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2024-01-14 14:04:37 +00:00
|
|
|
Returns:
|
|
|
|
dict: A dictionary containing cookies with their names as keys and values as cookie values.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
|
|
|
return {cookie["name"]: cookie["value"] for cookie in driver.get_cookies()}
|
2024-01-10 09:34:56 +00:00
|
|
|
|
2023-12-07 06:18:05 +00:00
|
|
|
def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
|
|
|
Attempts to bypass Cloudflare protection when accessing a URL using the provided WebDriver.
|
|
|
|
|
2024-01-14 14:04:37 +00:00
|
|
|
Args:
|
|
|
|
driver (WebDriver): The WebDriver to use for accessing the URL.
|
|
|
|
url (str): The URL to access.
|
|
|
|
timeout (int): Time in seconds to wait for the page to load.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
Exception: If there is an error while bypassing Cloudflare or loading the page.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2023-12-07 06:18:05 +00:00
|
|
|
driver.get(url)
|
|
|
|
if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js":
|
|
|
|
if debug.logging:
|
|
|
|
print("Cloudflare protection detected:", url)
|
2024-01-27 01:00:44 +00:00
|
|
|
|
|
|
|
# Open website in a new tab
|
|
|
|
element = driver.find_element(By.ID, "challenge-body-text")
|
|
|
|
driver.execute_script(f"""
|
|
|
|
arguments[0].addEventListener('click', () => {{
|
|
|
|
window.open(arguments[1]);
|
|
|
|
}});
|
|
|
|
""", element, url)
|
|
|
|
element.click()
|
2024-03-12 01:06:06 +00:00
|
|
|
time.sleep(5)
|
2024-01-27 01:00:44 +00:00
|
|
|
|
|
|
|
# Switch to the new tab and close the old tab
|
|
|
|
original_window = driver.current_window_handle
|
|
|
|
for window_handle in driver.window_handles:
|
|
|
|
if window_handle != original_window:
|
|
|
|
driver.close()
|
|
|
|
driver.switch_to.window(window_handle)
|
|
|
|
break
|
|
|
|
|
2024-01-29 17:14:46 +00:00
|
|
|
# Click on the challenge button in the iframe
|
2023-12-07 06:18:05 +00:00
|
|
|
try:
|
|
|
|
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
|
|
|
|
WebDriverWait(driver, 5).until(
|
|
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input"))
|
2024-01-14 06:45:41 +00:00
|
|
|
).click()
|
2024-02-09 13:24:15 +00:00
|
|
|
except NoSuchElementException:
|
|
|
|
...
|
2024-01-14 06:45:41 +00:00
|
|
|
except Exception as e:
|
|
|
|
if debug.logging:
|
2024-03-12 01:06:06 +00:00
|
|
|
print(f"Error bypassing Cloudflare: {str(e).splitlines()[0]}")
|
|
|
|
#driver.switch_to.default_content()
|
|
|
|
driver.switch_to.window(window_handle)
|
|
|
|
driver.execute_script("document.href = document.href;")
|
2023-12-07 06:18:05 +00:00
|
|
|
WebDriverWait(driver, timeout).until(
|
|
|
|
EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
|
|
|
|
)
|
2023-11-20 13:00:40 +00:00
|
|
|
|
2024-01-14 06:45:41 +00:00
|
|
|
class WebDriverSession:
|
|
|
|
"""
|
|
|
|
Manages a Selenium WebDriver session, including handling of virtual displays and proxies.
|
|
|
|
"""
|
2024-01-14 14:04:37 +00:00
|
|
|
|
2023-11-20 13:00:40 +00:00
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
webdriver: WebDriver = None,
|
|
|
|
user_data_dir: str = None,
|
|
|
|
headless: bool = False,
|
|
|
|
virtual_display: bool = False,
|
|
|
|
proxy: str = None,
|
|
|
|
options: ChromeOptions = None
|
|
|
|
):
|
2024-01-14 14:04:37 +00:00
|
|
|
"""
|
|
|
|
Initializes a new instance of the WebDriverSession.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
webdriver (WebDriver, optional): A WebDriver instance for the session. Defaults to None.
|
|
|
|
user_data_dir (str, optional): Directory for user data. Defaults to None.
|
|
|
|
headless (bool, optional): Whether to run the browser in headless mode. Defaults to False.
|
|
|
|
virtual_display (bool, optional): Whether to use a virtual display. Defaults to False.
|
|
|
|
proxy (str, optional): Proxy settings for the browser. Defaults to None.
|
|
|
|
options (ChromeOptions, optional): ChromeOptions for the browser. Defaults to None.
|
|
|
|
"""
|
2023-11-20 13:00:40 +00:00
|
|
|
self.webdriver = webdriver
|
|
|
|
self.user_data_dir = user_data_dir
|
|
|
|
self.headless = headless
|
2024-01-14 06:45:41 +00:00
|
|
|
self.virtual_display = Display(size=(1920, 1080)) if has_pyvirtualdisplay and virtual_display else None
|
2023-11-20 13:00:40 +00:00
|
|
|
self.proxy = proxy
|
|
|
|
self.options = options
|
|
|
|
self.default_driver = None
|
|
|
|
|
|
|
|
def reopen(
|
|
|
|
self,
|
|
|
|
user_data_dir: str = None,
|
|
|
|
headless: bool = False,
|
|
|
|
virtual_display: bool = False
|
|
|
|
) -> WebDriver:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-14 14:04:37 +00:00
|
|
|
Reopens the WebDriver session with new settings.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
user_data_dir (str, optional): Directory for user data. Defaults to current value.
|
|
|
|
headless (bool, optional): Whether to run the browser in headless mode. Defaults to current value.
|
|
|
|
virtual_display (bool, optional): Whether to use a virtual display. Defaults to current value.
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2024-01-14 14:04:37 +00:00
|
|
|
Returns:
|
|
|
|
WebDriver: The reopened WebDriver instance.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-26 06:54:13 +00:00
|
|
|
user_data_dir = user_data_dir or self.user_data_dir
|
2023-11-20 13:00:40 +00:00
|
|
|
if self.default_driver:
|
|
|
|
self.default_driver.quit()
|
|
|
|
if not virtual_display and self.virtual_display:
|
|
|
|
self.virtual_display.stop()
|
|
|
|
self.virtual_display = None
|
|
|
|
self.default_driver = get_browser(user_data_dir, headless, self.proxy)
|
|
|
|
return self.default_driver
|
|
|
|
|
|
|
|
def __enter__(self) -> WebDriver:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2024-01-14 14:04:37 +00:00
|
|
|
Context management method for entering a session. Initializes and returns a WebDriver instance.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
WebDriver: An instance of WebDriver for this session.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2023-11-20 13:00:40 +00:00
|
|
|
if self.webdriver:
|
|
|
|
return self.webdriver
|
|
|
|
if self.virtual_display:
|
|
|
|
self.virtual_display.start()
|
|
|
|
self.default_driver = get_browser(self.user_data_dir, self.headless, self.proxy, self.options)
|
|
|
|
return self.default_driver
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
|
|
|
Context management method for exiting a session. Closes and quits the WebDriver.
|
2024-01-14 14:04:37 +00:00
|
|
|
|
|
|
|
Args:
|
|
|
|
exc_type: Exception type.
|
|
|
|
exc_val: Exception value.
|
|
|
|
exc_tb: Exception traceback.
|
|
|
|
|
|
|
|
Note:
|
|
|
|
Closes the WebDriver and stops the virtual display if used.
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
2023-11-20 13:00:40 +00:00
|
|
|
if self.default_driver:
|
|
|
|
try:
|
|
|
|
self.default_driver.close()
|
2024-01-14 06:45:41 +00:00
|
|
|
except Exception as e:
|
|
|
|
if debug.logging:
|
2024-03-12 01:06:06 +00:00
|
|
|
print(f"Error closing WebDriver: {str(e).splitlines()[0]}")
|
2024-01-28 19:28:37 +00:00
|
|
|
finally:
|
|
|
|
self.default_driver.quit()
|
2023-11-20 13:00:40 +00:00
|
|
|
if self.virtual_display:
|
2024-01-26 11:49:52 +00:00
|
|
|
self.virtual_display.stop()
|
|
|
|
|
|
|
|
def element_send_text(element: WebElement, text: str) -> None:
|
2024-03-12 01:06:06 +00:00
|
|
|
script = "arguments[0].innerText = arguments[1];"
|
2024-01-26 11:49:52 +00:00
|
|
|
element.parent.execute_script(script, element, text)
|
|
|
|
element.send_keys(Keys.ENTER)
|