2023-09-25 22:52:29 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2023-10-07 08:17:43 +00:00
|
|
|
import json
|
2023-10-02 00:04:22 +00:00
|
|
|
from functools import partialmethod
|
2023-11-19 04:36:04 +00:00
|
|
|
from typing import AsyncGenerator
|
2023-12-02 04:40:07 +00:00
|
|
|
from urllib.parse import urlparse
|
|
|
|
from curl_cffi.requests import AsyncSession, Session, Response
|
2024-01-10 09:34:56 +00:00
|
|
|
from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies
|
2023-09-25 22:52:29 +00:00
|
|
|
|
|
|
|
class StreamResponse:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
|
|
|
A wrapper class for handling asynchronous streaming responses.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
inner (Response): The original Response object.
|
|
|
|
"""
|
|
|
|
|
2023-11-19 04:36:04 +00:00
|
|
|
def __init__(self, inner: Response) -> None:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Initialize the StreamResponse with the provided Response object."""
|
2023-10-07 08:17:43 +00:00
|
|
|
self.inner: Response = inner
|
2023-09-25 22:52:29 +00:00
|
|
|
|
|
|
|
async def text(self) -> str:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously get the response text."""
|
2023-11-19 04:36:04 +00:00
|
|
|
return await self.inner.atext()
|
2023-09-25 22:52:29 +00:00
|
|
|
|
2023-10-07 08:17:43 +00:00
|
|
|
def raise_for_status(self) -> None:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Raise an HTTPError if one occurred."""
|
2023-11-19 04:36:04 +00:00
|
|
|
self.inner.raise_for_status()
|
2023-09-25 22:52:29 +00:00
|
|
|
|
2023-10-07 08:17:43 +00:00
|
|
|
async def json(self, **kwargs) -> dict:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously parse the JSON response content."""
|
2023-11-19 04:36:04 +00:00
|
|
|
return json.loads(await self.inner.acontent(), **kwargs)
|
2023-10-02 00:04:22 +00:00
|
|
|
|
2023-11-19 04:36:04 +00:00
|
|
|
async def iter_lines(self) -> AsyncGenerator[bytes, None]:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously iterate over the lines of the response."""
|
2023-11-19 04:36:04 +00:00
|
|
|
async for line in self.inner.aiter_lines():
|
|
|
|
yield line
|
2023-10-02 00:04:22 +00:00
|
|
|
|
2023-11-19 04:36:04 +00:00
|
|
|
async def iter_content(self) -> AsyncGenerator[bytes, None]:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously iterate over the response content."""
|
2023-11-19 04:36:04 +00:00
|
|
|
async for chunk in self.inner.aiter_content():
|
2023-10-02 00:04:22 +00:00
|
|
|
yield chunk
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2024-01-13 14:37:36 +00:00
|
|
|
async def __aenter__(self):
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously enter the runtime context for the response object."""
|
2024-01-13 14:37:36 +00:00
|
|
|
inner: Response = await self.inner
|
|
|
|
self.inner = inner
|
|
|
|
self.request = inner.request
|
|
|
|
self.status_code: int = inner.status_code
|
|
|
|
self.reason: str = inner.reason
|
|
|
|
self.ok: bool = inner.ok
|
|
|
|
self.headers = inner.headers
|
|
|
|
self.cookies = inner.cookies
|
|
|
|
return self
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2024-01-13 14:37:36 +00:00
|
|
|
async def __aexit__(self, *args):
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Asynchronously exit the runtime context for the response object."""
|
2024-01-13 14:37:36 +00:00
|
|
|
await self.inner.aclose()
|
2023-10-02 00:04:22 +00:00
|
|
|
|
2024-01-14 06:45:41 +00:00
|
|
|
|
2023-10-02 00:04:22 +00:00
|
|
|
class StreamSession(AsyncSession):
|
2024-01-14 06:45:41 +00:00
|
|
|
"""
|
|
|
|
An asynchronous session class for handling HTTP requests with streaming.
|
|
|
|
|
|
|
|
Inherits from AsyncSession.
|
|
|
|
"""
|
|
|
|
|
2024-01-13 14:37:36 +00:00
|
|
|
def request(
|
2023-10-07 17:00:45 +00:00
|
|
|
self, method: str, url: str, **kwargs
|
2024-01-13 14:37:36 +00:00
|
|
|
) -> StreamResponse:
|
2024-01-14 06:45:41 +00:00
|
|
|
"""Create and return a StreamResponse object for the given HTTP request."""
|
2024-01-13 14:37:36 +00:00
|
|
|
return StreamResponse(super().request(method, url, stream=True, **kwargs))
|
2023-10-07 08:17:43 +00:00
|
|
|
|
2024-01-14 06:45:41 +00:00
|
|
|
# Defining HTTP methods as partial methods of the request method.
|
2023-09-26 08:03:37 +00:00
|
|
|
head = partialmethod(request, "HEAD")
|
|
|
|
get = partialmethod(request, "GET")
|
|
|
|
post = partialmethod(request, "POST")
|
|
|
|
put = partialmethod(request, "PUT")
|
|
|
|
patch = partialmethod(request, "PATCH")
|
2023-12-02 04:40:07 +00:00
|
|
|
delete = partialmethod(request, "DELETE")
|
2024-01-14 06:45:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
|
|
|
|
"""
|
|
|
|
Create a Session object using a WebDriver to handle cookies and headers.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
url (str): The URL to navigate to using the WebDriver.
|
|
|
|
webdriver (WebDriver, optional): The WebDriver instance to use.
|
|
|
|
proxy (str, optional): Proxy server to use for the Session.
|
|
|
|
timeout (int, optional): Timeout in seconds for the WebDriver.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Session: A Session object configured with cookies and headers from the WebDriver.
|
|
|
|
"""
|
2023-12-02 04:40:07 +00:00
|
|
|
with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver:
|
2023-12-07 06:18:05 +00:00
|
|
|
bypass_cloudflare(driver, url, timeout)
|
2024-01-10 09:34:56 +00:00
|
|
|
cookies = get_driver_cookies(driver)
|
2023-12-02 04:40:07 +00:00
|
|
|
user_agent = driver.execute_script("return navigator.userAgent")
|
|
|
|
|
|
|
|
parse = urlparse(url)
|
|
|
|
return Session(
|
|
|
|
cookies=cookies,
|
|
|
|
headers={
|
|
|
|
'accept': '*/*',
|
|
|
|
'authority': parse.netloc,
|
|
|
|
'origin': f'{parse.scheme}://{parse.netloc}',
|
|
|
|
'referer': url,
|
|
|
|
'sec-fetch-dest': 'empty',
|
|
|
|
'sec-fetch-mode': 'cors',
|
|
|
|
'sec-fetch-site': 'same-origin',
|
|
|
|
'user-agent': user_agent
|
|
|
|
},
|
|
|
|
proxies={"https": proxy, "http": proxy},
|
|
|
|
timeout=timeout,
|
|
|
|
impersonate="chrome110"
|
2024-01-14 06:45:41 +00:00
|
|
|
)
|