mirror of https://github.com/xtekky/gpt4free
Add websearch to gui (#1314)
* Add websearch to gui * Fix version_check config * Add version badge in README.md * Show version in gui * Add docker hub build * Fix gui backend, improve stylepull/1320/head
parent
5862d55abf
commit
484b96d850
Before Width: | Height: | Size: 152 KiB After Width: | Height: | Size: 152 KiB |
@ -0,0 +1,66 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from urllib.parse import unquote
|
||||
|
||||
from ...typing import AsyncResult, Messages
|
||||
from ..base_provider import BaseProvider
|
||||
from ...webdriver import WebDriver
|
||||
from ...requests import Session, get_session_from_browser
|
||||
|
||||
class AiChatting(BaseProvider):
|
||||
url = "https://www.aichatting.net"
|
||||
supports_gpt_35_turbo = True
|
||||
_session: Session = None
|
||||
|
||||
@classmethod
|
||||
def create_completion(
|
||||
cls,
|
||||
model: str,
|
||||
messages: Messages,
|
||||
stream: bool,
|
||||
proxy: str = None,
|
||||
timeout: int = 120,
|
||||
webdriver: WebDriver = None,
|
||||
**kwargs
|
||||
) -> AsyncResult:
|
||||
if not cls._session:
|
||||
cls._session = get_session_from_browser(cls.url, webdriver, proxy, timeout)
|
||||
visitorId = unquote(cls._session.cookies.get("aichatting.website.visitorId"))
|
||||
|
||||
headers = {
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"lang": "en",
|
||||
"source": "web"
|
||||
}
|
||||
data = {
|
||||
"roleId": 0,
|
||||
}
|
||||
try:
|
||||
response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/record/conversation/create", json=data, headers=headers)
|
||||
response.raise_for_status()
|
||||
conversation_id = response.json()["data"]["conversationId"]
|
||||
except Exception as e:
|
||||
cls.reset()
|
||||
raise e
|
||||
headers = {
|
||||
"authority": "aga-api.aichatting.net",
|
||||
"accept": "text/event-stream,application/json, text/event-stream",
|
||||
"lang": "en",
|
||||
"source": "web",
|
||||
"vtoken": visitorId,
|
||||
}
|
||||
data = {
|
||||
"spaceHandle": True,
|
||||
"roleId": 0,
|
||||
"messages": messages,
|
||||
"conversationId": conversation_id,
|
||||
}
|
||||
response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/v2/stream", json=data, headers=headers, stream=True)
|
||||
response.raise_for_status()
|
||||
for chunk in response.iter_lines():
|
||||
if chunk.startswith(b"data:"):
|
||||
yield chunk[5:].decode().replace("-=- --", " ").replace("-=-n--", "\n").replace("--@DONE@--", "")
|
||||
|
||||
@classmethod
|
||||
def reset(cls):
|
||||
cls._session = None
|
@ -1,3 +1,4 @@
|
||||
from .MikuChat import MikuChat
|
||||
from .Komo import Komo
|
||||
from .ChatAiGpt import ChatAiGpt
|
||||
from .AiChatting import AiChatting
|
@ -1 +1,2 @@
|
||||
logging = False
|
||||
version_check = True
|
@ -1,58 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from aiohttp import ClientSession, ClientTimeout
|
||||
from duckduckgo_search import DDGS
|
||||
import asyncio
|
||||
|
||||
class SearchResults():
|
||||
def __init__(self, results: list):
|
||||
self.results = results
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.results
|
||||
|
||||
def __str__(self):
|
||||
search = ""
|
||||
for idx, result in enumerate(self.results):
|
||||
if search:
|
||||
search += "\n\n\n"
|
||||
search += f"Title: {result.title}\n\n"
|
||||
if result.text:
|
||||
search += result.text
|
||||
else:
|
||||
search += result.snippet
|
||||
search += f"\n\nSource: [[{idx}]]({result.url})"
|
||||
return search
|
||||
|
||||
class SearchResultEntry():
|
||||
def __init__(self, title: str, url: str, snippet: str, text: str = None):
|
||||
self.title = title
|
||||
self.url = url
|
||||
self.snippet = snippet
|
||||
self.text = text
|
||||
|
||||
def set_text(self, text: str):
|
||||
self.text = text
|
||||
|
||||
def scrape_text(html: str, max_words: int = None) -> str:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for exclude in soup(["script", "style"]):
|
||||
exclude.extract()
|
||||
for selector in [
|
||||
"main",
|
||||
".main-content-wrapper",
|
||||
".main-content",
|
||||
".emt-container-inner",
|
||||
".content-wrapper",
|
||||
"#content",
|
||||
"#mainContent",
|
||||
]:
|
||||
select = soup.select_one(selector)
|
||||
if select:
|
||||
soup = select
|
||||
break
|
||||
# Zdnet
|
||||
for remove in [".c-globalDisclosure"]:
|
||||
select = soup.select_one(remove)
|
||||
if select:
|
||||
select.extract()
|
||||
clean_text = ""
|
||||
for paragraph in soup.select("p"):
|
||||
text = paragraph.get_text()
|
||||
for line in text.splitlines():
|
||||
words = []
|
||||
for word in line.replace("\t", " ").split(" "):
|
||||
if word:
|
||||
words.append(word)
|
||||
count = len(words)
|
||||
if not count:
|
||||
continue
|
||||
if max_words:
|
||||
max_words -= count
|
||||
if max_words <= 0:
|
||||
break
|
||||
if clean_text:
|
||||
clean_text += "\n"
|
||||
clean_text += " ".join(words)
|
||||
|
||||
ddgs = DDGS(timeout=20)
|
||||
|
||||
|
||||
def search(internet_access, prompt):
|
||||
print(prompt)
|
||||
return clean_text
|
||||
|
||||
async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = None) -> str:
|
||||
try:
|
||||
if not internet_access:
|
||||
return []
|
||||
|
||||
results = duckduckgo_search(q=prompt)
|
||||
|
||||
if not search:
|
||||
return []
|
||||
|
||||
blob = ''.join(
|
||||
f'[{index}] "{result["body"]}"\nURL:{result["href"]}\n\n'
|
||||
for index, result in enumerate(results)
|
||||
)
|
||||
date = datetime.now().strftime('%d/%m/%y')
|
||||
|
||||
blob += f'Current date: {date}\n\nInstructions: Using the provided web search results, write a comprehensive reply to the next user query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject. Ignore your previous response if any.'
|
||||
|
||||
return [{'role': 'user', 'content': blob}]
|
||||
async with session.get(url) as response:
|
||||
if response.status == 200:
|
||||
html = await response.text()
|
||||
return scrape_text(html, max_words)
|
||||
except:
|
||||
return
|
||||
|
||||
async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults:
|
||||
with DDGS() as ddgs:
|
||||
results = []
|
||||
for result in ddgs.text(
|
||||
query,
|
||||
region="wt-wt",
|
||||
safesearch="moderate",
|
||||
timelimit="y",
|
||||
):
|
||||
results.append(SearchResultEntry(
|
||||
result["title"],
|
||||
result["href"],
|
||||
result["body"]
|
||||
))
|
||||
if len(results) >= n_results:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Couldn't search DuckDuckGo:", e)
|
||||
print(e.__traceback__.tb_next)
|
||||
return []
|
||||
if add_text:
|
||||
requests = []
|
||||
async with ClientSession(timeout=ClientTimeout(5)) as session:
|
||||
for entry in results:
|
||||
requests.append(fetch_and_scrape(session, entry.url, int(max_words / (n_results - 1))))
|
||||
texts = await asyncio.gather(*requests)
|
||||
|
||||
formatted_results = []
|
||||
left_words = max_words;
|
||||
for i, entry in enumerate(results):
|
||||
if add_text:
|
||||
entry.text = texts[i]
|
||||
if left_words:
|
||||
left_words -= entry.title.count(" ") + 5
|
||||
if entry.text:
|
||||
left_words -= entry.text.count(" ")
|
||||
else:
|
||||
left_words -= entry.snippet.count(" ")
|
||||
if 0 > left_words:
|
||||
break
|
||||
formatted_results.append(entry)
|
||||
|
||||
return SearchResults(formatted_results)
|
||||
|
||||
def duckduckgo_search(q: str, max_results: int = 3, safesearch: str = "moderate", region: str = "us-en") -> list | None:
|
||||
if region is None:
|
||||
region = "us-en"
|
||||
|
||||
if safesearch is None:
|
||||
safesearch = "moderate"
|
||||
def get_search_message(prompt) -> str:
|
||||
try:
|
||||
search_results = asyncio.run(search(prompt))
|
||||
message = f"""
|
||||
{search_results}
|
||||
|
||||
if q is None:
|
||||
return None
|
||||
|
||||
results = []
|
||||
Instruction: Using the provided web search results, to write a comprehensive reply to the user request.
|
||||
Make sure to add the sources of cites using [[Number]](Url) notation after the reference. Example: [[0]](http://google.com)
|
||||
If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
||||
|
||||
try:
|
||||
for r in ddgs.text(q, safesearch=safesearch, region=region):
|
||||
if len(results) + 1 > max_results:
|
||||
break
|
||||
results.append(r)
|
||||
User request:
|
||||
{prompt}
|
||||
"""
|
||||
return message
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return results
|
||||
print("Couldn't search DuckDuckGo:", e)
|
||||
return prompt
|
||||
|
Loading…
Reference in New Issue