2023-10-15 17:10:25 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
from aiohttp import ClientSession
|
|
|
|
|
|
|
|
from ..typing import AsyncResult, Messages
|
2024-04-06 22:07:58 +00:00
|
|
|
from ..requests.raise_for_status import raise_for_status
|
2024-01-23 18:44:48 +00:00
|
|
|
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
2023-10-15 17:10:25 +00:00
|
|
|
|
|
|
|
|
2024-04-19 07:27:33 +00:00
|
|
|
class Llama(AsyncGeneratorProvider, ProviderModelMixin):
|
2023-10-27 20:59:14 +00:00
|
|
|
url = "https://www.llama2.ai"
|
|
|
|
working = True
|
|
|
|
supports_message_history = True
|
2024-04-19 07:27:33 +00:00
|
|
|
default_model = "meta/llama-3-70b-chat"
|
2024-01-23 18:44:48 +00:00
|
|
|
models = [
|
|
|
|
"meta/llama-2-7b-chat",
|
|
|
|
"meta/llama-2-13b-chat",
|
|
|
|
"meta/llama-2-70b-chat",
|
2024-04-19 12:10:56 +00:00
|
|
|
"meta/meta-llama-3-8b-instruct",
|
|
|
|
"meta/meta-llama-3-70b-instruct",
|
2024-01-23 18:44:48 +00:00
|
|
|
]
|
|
|
|
model_aliases = {
|
2024-04-19 12:10:56 +00:00
|
|
|
"meta-llama/Meta-Llama-3-8b-instruct": "meta/meta-llama-3-8b-instruct",
|
|
|
|
"meta-llama/Meta-Llama-3-70b-instruct": "meta/meta-llama-3-70b-instruct",
|
2024-01-23 18:44:48 +00:00
|
|
|
"meta-llama/Llama-2-7b-chat-hf": "meta/llama-2-7b-chat",
|
|
|
|
"meta-llama/Llama-2-13b-chat-hf": "meta/llama-2-13b-chat",
|
|
|
|
"meta-llama/Llama-2-70b-chat-hf": "meta/llama-2-70b-chat",
|
|
|
|
}
|
2023-10-15 17:10:25 +00:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
async def create_async_generator(
|
|
|
|
cls,
|
|
|
|
model: str,
|
|
|
|
messages: Messages,
|
|
|
|
proxy: str = None,
|
2024-03-11 01:41:59 +00:00
|
|
|
system_message: str = "You are a helpful assistant.",
|
|
|
|
temperature: float = 0.75,
|
|
|
|
top_p: float = 0.9,
|
|
|
|
max_tokens: int = 8000,
|
2023-10-15 17:10:25 +00:00
|
|
|
**kwargs
|
|
|
|
) -> AsyncResult:
|
|
|
|
headers = {
|
|
|
|
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
|
|
|
|
"Accept": "*/*",
|
|
|
|
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
|
|
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
|
|
"Referer": f"{cls.url}/",
|
|
|
|
"Content-Type": "text/plain;charset=UTF-8",
|
|
|
|
"Origin": cls.url,
|
|
|
|
"Connection": "keep-alive",
|
|
|
|
"Sec-Fetch-Dest": "empty",
|
|
|
|
"Sec-Fetch-Mode": "cors",
|
|
|
|
"Sec-Fetch-Site": "same-origin",
|
|
|
|
"Pragma": "no-cache",
|
|
|
|
"Cache-Control": "no-cache",
|
|
|
|
"TE": "trailers"
|
|
|
|
}
|
|
|
|
async with ClientSession(headers=headers) as session:
|
2024-03-11 01:41:59 +00:00
|
|
|
system_messages = [message["content"] for message in messages if message["role"] == "system"]
|
|
|
|
if system_messages:
|
|
|
|
system_message = "\n".join(system_messages)
|
|
|
|
messages = [message for message in messages if message["role"] != "system"]
|
2023-10-15 17:10:25 +00:00
|
|
|
prompt = format_prompt(messages)
|
|
|
|
data = {
|
|
|
|
"prompt": prompt,
|
2024-01-23 18:44:48 +00:00
|
|
|
"model": cls.get_model(model),
|
2024-03-11 01:41:59 +00:00
|
|
|
"systemPrompt": system_message,
|
|
|
|
"temperature": temperature,
|
|
|
|
"topP": top_p,
|
|
|
|
"maxTokens": max_tokens,
|
2023-10-15 17:10:25 +00:00
|
|
|
"image": None
|
|
|
|
}
|
|
|
|
started = False
|
|
|
|
async with session.post(f"{cls.url}/api", json=data, proxy=proxy) as response:
|
2024-04-06 22:07:58 +00:00
|
|
|
await raise_for_status(response)
|
2023-10-15 17:10:25 +00:00
|
|
|
async for chunk in response.content.iter_any():
|
2024-04-06 22:07:58 +00:00
|
|
|
if not chunk:
|
|
|
|
continue
|
2023-10-15 17:10:25 +00:00
|
|
|
if not started:
|
|
|
|
chunk = chunk.lstrip()
|
|
|
|
started = True
|
2024-01-18 02:55:44 +00:00
|
|
|
yield chunk.decode(errors="ignore")
|
2023-10-15 17:10:25 +00:00
|
|
|
|
|
|
|
def format_prompt(messages: Messages):
|
|
|
|
messages = [
|
2023-10-26 19:32:49 +00:00
|
|
|
f"[INST] {message['content']} [/INST]"
|
2023-10-15 17:10:25 +00:00
|
|
|
if message["role"] == "user"
|
|
|
|
else message["content"]
|
|
|
|
for message in messages
|
|
|
|
]
|
2024-01-18 02:55:44 +00:00
|
|
|
return "\n".join(messages) + "\n"
|