add more models

mixtral 8x22b
2024-11-10 19:11:01 +00:00 · 2024-04-13 02:46:20 +01:00 · 2024-04-13 02:46:20 +01:00 · ed8afc20e8
commit ed8afc20e8
parent 8c560b87f1
6 changed files with 49 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -60,3 +60,6 @@ models
 projects/windows/g4f
 doc.txt
 dist.py
+x.txt
+bench.py
+to-reverse.txt
--- a/README.md
+++ b/README.md
@ -281,6 +281,15 @@ set G4F_PROXY=http://host:port
 | [beta.theb.ai](https://beta.theb.ai) | `g4f.Provider.Theb` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |
 | [you.com](https://you.com) | `g4f.Provider.You` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |

+## New OpenSource Models
+While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. Some rival gpt-4. Expect this list to grow.
+
+| Website | Provider |  parameters |
+| ------  | -------  |  ------ | 
+| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active |
+| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active|
+
+
 ### GPT-3.5

 | Website | Provider | GPT-3.5 | GPT-4 | Stream | Status | Auth |
--- a/g4f/Provider/DeepInfra.py
+++ b/g4f/Provider/DeepInfra.py
@ -11,7 +11,7 @@ class DeepInfra(Openai):
    needs_auth = False
    supports_stream = True
    supports_message_history = True
-    default_model = 'meta-llama/Llama-2-70b-chat-hf'
+    default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1'

    @classmethod
    def get_models(cls):
@ -32,6 +32,14 @@ class DeepInfra(Openai):
        max_tokens: int = 1028,
        **kwargs
    ) -> AsyncResult:
+        
+        if not '/' in model:
+            models = {
+                'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1',
+                'dbrx-instruct': 'databricks/dbrx-instruct',
+            }
+            model = models.get(model, model)
+        
        headers = {
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US',
--- a/g4f/Provider/needs_auth/Openai.py
+++ b/g4f/Provider/needs_auth/Openai.py
@ -51,6 +51,7 @@ class Openai(AsyncGeneratorProvider, ProviderModelMixin):
                stream=stream,
                **extra_data
            )
+            
            async with session.post(f"{api_base.rstrip('/')}/chat/completions", json=data) as response:
                await raise_for_status(response)
                if not stream:
--- a/g4f/client/service.py
+++ b/g4f/client/service.py
@ -55,6 +55,7 @@ def get_model_and_provider(model    : Union[Model, str],
        provider = convert_to_provider(provider)

    if isinstance(model, str):
+        
        if model in ModelUtils.convert:
            model = ModelUtils.convert[model]
    
--- a/g4f/models.py
+++ b/g4f/models.py
@ -162,11 +162,11 @@ mistral_7b_v02 = Model(
    best_provider = DeepInfra
 )

-# mixtral_8x22b = Model(
-#     name          = "mistralai/Mixtral-8x22B-v0.1",
-#     base_provider = "huggingface",
-#     best_provider = DeepInfra
-# )
+mixtral_8x22b = Model(
+    name          = "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+    base_provider = "huggingface",
+    best_provider = DeepInfra
+)

 # Misc models
 dolphin_mixtral_8x7b = Model(
@ -266,6 +266,12 @@ pi = Model(
    best_provider = Pi
 )

+dbrx_instruct = Model(
+    name = 'databricks/dbrx-instruct',
+    base_provider = 'mistral',
+    best_provider = DeepInfra
+)
+
 class ModelUtils:
    """
    Utility class for mapping string identifiers to Model instances.
@ -301,19 +307,27 @@ class ModelUtils:
        'gigachat_plus': gigachat_plus,
        'gigachat_pro' : gigachat_pro,
        
+        # Mistral Opensource
        'mixtral-8x7b': mixtral_8x7b,
        'mistral-7b': mistral_7b,
        'mistral-7b-v02': mistral_7b_v02,
-        # 'mixtral-8x22b': mixtral_8x22b,
+        'mixtral-8x22b': mixtral_8x22b,
        'dolphin-mixtral-8x7b': dolphin_mixtral_8x7b,
-        'lzlv-70b': lzlv_70b,
-        'airoboros-70b': airoboros_70b,
-        'openchat_3.5': openchat_35,
+        
+        # google gemini
        'gemini': gemini,
        'gemini-pro': gemini_pro,
+        
+        # anthropic
        'claude-v2': claude_v2,
        'claude-3-opus': claude_3_opus,
        'claude-3-sonnet': claude_3_sonnet,
+        
+        # other
+        'dbrx-instruct': dbrx_instruct,
+        'lzlv-70b': lzlv_70b,
+        'airoboros-70b': airoboros_70b,
+        'openchat_3.5': openchat_35,
        'pi': pi
    }