Add Llama2 Providers / Models

pull/1175/head
Heiner Lohaus 8 months ago
parent 92a31d8281
commit 0d1ae405cc

@ -0,0 +1,63 @@
from __future__ import annotations
import json
from aiohttp import ClientSession
from ..typing import AsyncResult, Messages
from .base_provider import AsyncGeneratorProvider
class DeepInfra(AsyncGeneratorProvider):
url = "https://deepinfra.com"
working = True
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
proxy: str = None,
**kwargs
) -> AsyncResult:
if not model:
model = "meta-llama/Llama-2-70b-chat-hf"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
"Accept": "text/event-stream",
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"Referer": f"{cls.url}/",
"Content-Type": "application/json",
"X-Deepinfra-Source": "web-page",
"Origin": cls.url,
"Connection": "keep-alive",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
async with ClientSession(headers=headers) as session:
data = {
"model": model,
"messages": messages,
"stream": True,
}
async with session.post(
"https://api.deepinfra.com/v1/openai/chat/completions",
json=data,
proxy=proxy
) as response:
response.raise_for_status()
first = True
async for line in response.content:
if line.startswith(b"data: [DONE]"):
break
elif line.startswith(b"data: "):
chunk = json.loads(line[6:])["choices"][0]["delta"].get("content")
if chunk:
if first:
chunk = chunk.lstrip()
if chunk:
first = False
yield chunk

@ -6,15 +6,14 @@ from ..typing import AsyncResult, Messages
from .base_provider import AsyncGeneratorProvider
models = {
"7B": {"name": "Llama 2 7B", "version": "d24902e3fa9b698cc208b5e63136c4e26e828659a9f09827ca6ec5bb83014381", "shortened":"7B"},
"13B": {"name": "Llama 2 13B", "version": "9dff94b1bed5af738655d4a7cbcdcde2bd503aa85c94334fe1f42af7f3dd5ee3", "shortened":"13B"},
"70B": {"name": "Llama 2 70B", "version": "2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", "shortened":"70B"},
"meta-llama/Llama-2-7b-chat-hf": {"name": "Llama 2 7B", "version": "d24902e3fa9b698cc208b5e63136c4e26e828659a9f09827ca6ec5bb83014381", "shortened":"7B"},
"meta-llama/Llama-2-13b-chat-hf": {"name": "Llama 2 13B", "version": "9dff94b1bed5af738655d4a7cbcdcde2bd503aa85c94334fe1f42af7f3dd5ee3", "shortened":"13B"},
"meta-llama/Llama-2-70b-chat-hf": {"name": "Llama 2 70B", "version": "2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", "shortened":"70B"},
"Llava": {"name": "Llava 13B", "version": "6bc1c7bb0d2a34e413301fee8f7cc728d2d4e75bfab186aa995f63292bda92fc", "shortened":"Llava"}
}
class Llama2(AsyncGeneratorProvider):
url = "https://www.llama2.ai"
supports_gpt_35_turbo = True
working = True
@classmethod
@ -26,8 +25,8 @@ class Llama2(AsyncGeneratorProvider):
**kwargs
) -> AsyncResult:
if not model:
model = "70B"
if model not in models:
model = "meta-llama/Llama-2-70b-chat-hf"
elif model not in models:
raise ValueError(f"Model are not supported: {model}")
version = models[model]["version"]
headers = {
@ -54,7 +53,7 @@ class Llama2(AsyncGeneratorProvider):
"systemPrompt": kwargs.get("system_message", "You are a helpful assistant."),
"temperature": kwargs.get("temperature", 0.75),
"topP": kwargs.get("top_p", 0.9),
"maxTokens": kwargs.get("max_tokens", 1024),
"maxTokens": kwargs.get("max_tokens", 8000),
"image": None
}
started = False
@ -68,9 +67,9 @@ class Llama2(AsyncGeneratorProvider):
def format_prompt(messages: Messages):
messages = [
f"[INST]{message['content']}[/INST]"
f"[INST] {message['content']} [/INST]"
if message["role"] == "user"
else message["content"]
for message in messages
]
return "\n".join(messages)
return "\n".join(messages) + "\n"

@ -17,6 +17,7 @@ from .ChatgptFree import ChatgptFree
from .ChatgptLogin import ChatgptLogin
from .ChatgptX import ChatgptX
from .Cromicle import Cromicle
from .DeepInfra import DeepInfra
from .FakeGpt import FakeGpt
from .FreeGpt import FreeGpt
from .GPTalk import GPTalk
@ -70,6 +71,7 @@ class ProviderUtils:
'ChatgptX': ChatgptX,
'CodeLinkAva': CodeLinkAva,
'Cromicle': Cromicle,
'DeepInfra': DeepInfra,
'DfeHub': DfeHub,
'EasyChat': EasyChat,
'Equing': Equing,
@ -144,6 +146,7 @@ __all__ = [
'ChatgptLogin',
'ChatgptX',
'Cromicle',
'DeepInfra',
'CodeLinkAva',
'DfeHub',
'EasyChat',

@ -6,12 +6,14 @@ from .Provider import (
GptForLove,
ChatgptAi,
GptChatly,
DeepInfra,
ChatgptX,
ChatBase,
GeekGpt,
FakeGpt,
FreeGpt,
NoowAi,
Llama2,
Vercel,
Aichat,
GPTalk,
@ -74,6 +76,21 @@ gpt_4 = Model(
])
)
llama2_7b = Model(
name = "meta-llama/Llama-2-7b-chat-hf",
base_provider = 'huggingface',
best_provider = RetryProvider([Llama2, DeepInfra]))
llama2_13b = Model(
name ="meta-llama/Llama-2-13b-chat-hf",
base_provider = 'huggingface',
best_provider = RetryProvider([Llama2, DeepInfra]))
llama2_70b = Model(
name = "meta-llama/Llama-2-70b-chat-hf",
base_provider = "huggingface",
best_provider = RetryProvider([Llama2, DeepInfra]))
# Bard
palm = Model(
name = 'palm',

Loading…
Cancel
Save