From 6d05afc81b53f2fbe2908058e5afdb245c6e780a Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Fri, 21 Jun 2024 06:51:47 +0000
Subject: [PATCH] refactor: update models.yaml

---
 models.yaml | 302 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 206 insertions(+), 96 deletions(-)

diff --git a/models.yaml b/models.yaml
index f792ab8..dc8a659 100644
--- a/models.yaml
+++ b/models.yaml
@@ -23,49 +23,26 @@
       output_price: 30
       supports_vision: true
       supports_function_calling: true
-    - name: gpt-4-turbo-preview
-      max_input_tokens: 128000
-      max_output_tokens: 4096
-      input_price: 10
-      output_price: 30
-      supports_function_calling: true
-    - name: gpt-4-1106-preview
-      max_input_tokens: 128000
-      max_output_tokens: 4096
-      input_price: 10
-      output_price: 30
-      supports_function_calling: true
-    - name: gpt-4
-      max_input_tokens: 8192
-      max_output_tokens: 4096
-      input_price: 30
-      output_price: 60
     - name: gpt-3.5-turbo
       max_input_tokens: 16385
       max_output_tokens: 4096
       input_price: 0.5
       output_price: 1.5
       supports_function_calling: true
-    - name: gpt-3.5-turbo-1106
-      max_input_tokens: 16385
-      max_output_tokens: 4096
-      input_price: 1
-      output_price: 2
-      supports_function_calling: true
     - name: text-embedding-3-large
       mode: embedding
       max_input_tokens: 8191
-      default_chunk_size: 4000
+      default_chunk_size: 3000
       max_concurrent_chunks: 100
     - name: text-embedding-3-small
       mode: embedding
       max_input_tokens: 8191
-      default_chunk_size: 4000
+      default_chunk_size: 3000
       max_concurrent_chunks: 100
     - name: text-embedding-ada-002
       mode: embedding
       max_input_tokens: 8191
-      default_chunk_size: 4000
+      default_chunk_size: 3000
       max_concurrent_chunks: 100
 
 - platform: gemini 
@@ -99,7 +76,7 @@
     - name: text-embedding-004
       mode: embedding
       max_input_tokens: 2048
-      default_chunk_size: 2000
+      default_chunk_size: 1500
 
 - platform: claude
   # docs:
@@ -176,25 +153,21 @@
     - name: mistral-embed
       mode: embedding
       max_input_tokens: 8092
-      default_chunk_size: 4000
+      default_chunk_size: 2000
 
 - platform: cohere
   # docs:
   #   - https://docs.cohere.com/docs/command-r
   #   - https://cohere.com/pricing
   #   - https://docs.cohere.com/reference/chat
-  # notes
-  #   - get max_output_tokens info from api error
   models:
     - name: command-r
       max_input_tokens: 128000
-      max_output_tokens: 4000
       input_price: 0.5
       output_price: 1.5
       supports_function_calling: true
     - name: command-r-plus
       max_input_tokens: 128000
-      max_output_tokens: 4000
       input_price: 3
       output_price: 15
       supports_function_calling: true
@@ -242,32 +215,33 @@
   #   - https://docs.perplexity.ai/docs/model-cards
   #   - https://docs.perplexity.ai/docs/pricing
   #   - https://docs.perplexity.ai/reference/post_chat_completions
-  # notes
-  #   - get max_output_tokens info from api error
   models:
     - name: llama-3-sonar-small-32k-chat
       max_input_tokens: 32768
-      max_output_tokens: 32768
+      input_price: 0.2
+      output_price: 0.2
+    - name: llama-3-sonar-small-32k-online
+      max_input_tokens: 28000
       input_price: 0.2
       output_price: 0.2
     - name: llama-3-sonar-large-32k-chat	
       max_input_tokens: 32768
-      max_output_tokens: 32768
-      input_price: 0.6
-      output_price: 0.6
+      input_price: 1
+      output_price: 1
+    - name: llama-3-sonar-large-32k-online
+      max_input_tokens: 28000
+      input_price: 1
+      output_price: 1
     - name: llama-3-8b-instruct
       max_input_tokens: 8192
-      max_output_tokens: 8192
       input_price: 0.2
       output_price: 0.2
     - name: llama-3-70b-instruct
       max_input_tokens: 8192
-      max_output_tokens: 8192
       input_price: 1
       output_price: 1
     - name: mixtral-8x7b-instruct
       max_input_tokens: 16384
-      max_output_tokens: 16384
       input_price: 0.6
       output_price: 0.6
 
@@ -277,24 +251,28 @@
   #   - https://wow.groq.com
   #   - https://console.groq.com/docs/text-chat
   # notes:
-  #   - get max_output_tokens info from playgourd
   #   - all models are free with rate limits
   models:
     - name: llama3-8b-8192
       max_input_tokens: 8192
-      max_output_tokens: 8192
       input_price: 0.05
-      output_price: 0.10
+      output_price: 0.08
+      supports_function_calling: true
     - name: llama3-70b-8192
       max_input_tokens: 8192
-      max_output_tokens: 8192
       input_price: 0.59
       output_price: 0.79
+      supports_function_calling: true
     - name: mixtral-8x7b-32768
       max_input_tokens: 32768
-      max_output_tokens: 32768
-      input_price: 0.27
-      output_price: 0.27
+      input_price: 0.24
+      output_price: 0.24
+      supports_function_calling: true
+    - name: gemma-7b-it
+      max_input_tokens: 8192
+      input_price: 0.07
+      output_price: 0.07
+      supports_function_calling: true
 
 - platform: vertexai
   # docs:
@@ -327,12 +305,12 @@
     - name: text-embedding-004
       mode: embedding
       max_input_tokens: 3072
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_concurrent_chunks: 5
     - name: text-multilingual-embedding-002
       mode: embedding
       max_input_tokens: 3072
-      default_chunk_size: 3000
+      default_chunk_size: 2000
       max_concurrent_chunks: 5
 
 - platform: vertexai-claude
@@ -451,20 +429,32 @@
   #   - https://developers.cloudflare.com/workers-ai/models/
   #   - https://developers.cloudflare.com/workers-ai/platform/pricing/
   # notes:
-  #   - unable to get max_output_tokens info
+  #   - get max_output_tokens from playground
   models:
     - name: '@cf/meta/llama-3-8b-instruct'
-      max_input_tokens: 4096
-      max_output_tokens: 4096
+      max_input_tokens: 6144
+      max_output_tokens: 2048
       require_max_tokens: true
-    - name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
-      max_input_tokens: 4096
-      max_output_tokens: 4096
+      input_price: 0
+      output_price: 0
+    - name: '@hf/mistral/mistral-7b-instruct-v0.2'
+      max_input_tokens: 6144
+      max_output_tokens: 2048
       require_max_tokens: true
+      input_price: 0
+      output_price: 0
     - name: '@cf/qwen/qwen1.5-14b-chat-awq'
-      max_input_tokens: 4096
-      max_output_tokens: 4096
+      max_input_tokens: 6144
+      max_output_tokens: 2048
       require_max_tokens: true
+      input_price: 0
+      output_price: 0
+    - name: '@cf/google/gemma-7b-it'
+      max_input_tokens: 6144
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0
+      output_price: 0
 
 - platform: replicate
   # docs:
@@ -576,7 +566,7 @@
     - name: text-embedding-v2
       mode: embedding
       max_input_tokens: 2048
-      default_chunk_size: 2000
+      default_chunk_size: 1500
       max_concurrent_chunks: 5
 
 - platform: moonshot
@@ -591,14 +581,17 @@
       max_input_tokens: 8000
       input_price: 1.68
       output_price: 1.68
+      supports_function_calling: true
     - name: moonshot-v1-32k
       max_input_tokens: 32000
       input_price: 3.36
       output_price: 3.36
+      supports_function_calling: true
     - name: moonshot-v1-128k
       max_input_tokens: 128000
       input_price: 8.4
       output_price: 8.4
+      supports_function_calling: true
 
 - platform: deepseek
   # docs:
@@ -647,7 +640,7 @@
     - name: embedding-2
       mode: embedding
       max_input_tokens: 2048
-      default_chunk_size: 2000
+      default_chunk_size: 1500
 
 - platform: lingyiwanwu
   # docs:
@@ -686,8 +679,8 @@
 
 - platform: anyscale
   # docs:
-  #   - https://docs.anyscale.com/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct
-  #   - https://docs.endpoints.anyscale.com/pricing
+  #   - https://docs.anyscale.com/endpoints/text-generation/query-a-model
+  #   - https://www.anyscale.com/pricing-detail
   models:
     - name: meta-llama/Meta-Llama-3-8B-Instruct
       max_input_tokens: 8192
@@ -697,10 +690,6 @@
       max_input_tokens: 8192
       input_price: 1.0
       output_price: 1.0
-    - name: codellama/CodeLlama-70b-Instruct-hf
-      max_input_tokens: 4096
-      input_price: 1.0
-      output_price: 1.0
     - name: mistralai/Mistral-7B-Instruct-v0.1
       max_input_tokens: 16384
       input_price: 0.15
@@ -713,6 +702,18 @@
       max_input_tokens: 65536
       input_price: 0.90
       output_price: 0.90
+    - name: google/gemma-7b-it
+      max_input_tokens: 8192
+      input_price: 0.15
+      output_price: 0.15
+    - name: BAAI/bge-large-en-v1.5
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: thenlper/gte-large
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
 
 - platform: deepinfra
   # docs:
@@ -723,10 +724,12 @@
       max_input_tokens: 8192
       input_price: 0.08
       output_price: 0.08
+      supports_function_calling: true
     - name: meta-llama/Meta-Llama-3-70B-Instruct
       max_input_tokens: 8192
       input_price: 0.59
       output_price: 0.79
+      supports_function_calling: true
     - name: mistralai/Mistral-7B-Instruct-v0.3
       max_input_tokens: 32768
       input_price: 0.07
@@ -735,10 +738,16 @@
       max_input_tokens: 32768
       input_price: 0.24
       output_price: 0.24
+      supports_function_calling: true
     - name: mistralai/Mixtral-8x22B-Instruct-v0.1
       max_input_tokens: 65536
       input_price: 0.65
       output_price: 0.65
+      supports_function_calling: true
+    - name: google/gemma-1.1-7b-it
+      max_input_tokens: 8192
+      input_price: 0.07
+      output_price: 0.07
     - name: Qwen/Qwen2-72B-Instruct
       max_input_tokens: 32768
       input_price: 0.59
@@ -747,6 +756,46 @@
       max_input_tokens: 4096
       input_price: 0.14
       output_price: 0.14
+    - name: BAAI/bge-large-en-v1.5
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: BAAI/bge-base-en-v1.5
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: BAAI/bge-m3
+      mode: embedding
+      max_input_tokens: 8192
+      default_chunk_size: 2000
+    - name: intfloat/e5-base-v2
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: intfloat/e5-large-v2
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: intfloat/multilingual-e5-large
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: sentence-transformers/all-MiniLM-L6-v2
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: sentence-transformers/paraphrase-MiniLM-L6-v2
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: thenlper/gte-base
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: thenlper/gte-large
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
 
 - platform: fireworks
   # docs:
@@ -758,7 +807,7 @@
       input_price: 0.2
       output_price: 0.2
       supports_vision: true
-    - name: accounts/fireworks/models/firefunction-v1
+    - name: accounts/fireworks/models/firefunction-v2
       max_input_tokens: 32768
       input_price: 0.2
       output_price: 0.2
@@ -783,6 +832,10 @@
       max_input_tokens: 65536
       input_price: 0.9
       output_price: 0.9
+    - name: accounts/fireworks/models/gemma-7b-it
+      max_input_tokens: 8192
+      input_price: 0.2
+      output_price: 0.2
     - name: accounts/fireworks/models/qwen2-72b-instruct
       max_input_tokens: 32768
       input_price: 0.9
@@ -796,6 +849,22 @@
       input_price: 0.2
       output_price: 0.2
       supports_vision: true
+    - name: nomic-ai/nomic-embed-text-v1.5
+      mode: embedding
+      max_input_tokens: 8192
+      default_chunk_size: 1500
+    - name: WhereIsAI/UAE-Large-V1
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: thenlper/gte-large
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: thenlper/gte-base
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
 
 - platform: openrouter
   # docs:
@@ -805,30 +874,22 @@
       max_input_tokens: 8192
       input_price: 0.07
       output_price: 0.07
-    - name: meta-llama/llama-3-8b-instruct:nitro
-      max_input_tokens: 8192
-      input_price: 0.2
-      output_price: 0.2
-    - name: meta-llama/llama-3-8b-instruct:extended
-      max_input_tokens: 16384
-      input_price: 0.2
-      output_price: 1.125
     - name: meta-llama/llama-3-70b-instruct
       max_input_tokens: 8192
       input_price: 0.59
       output_price: 0.79
-    - name: meta-llama/llama-3-70b-instruct:nitro
-      max_input_tokens: 8192
-      input_price: 0.9
-      output_price: 0.9
-    - name: mistralai/mistral-7b-instruct-v0.3
-      max_input_tokens: 32768
-      input_price: 0.07
-      output_price: 0.07
     - name: microsoft/phi-3-mini-128k-instruct
       max_input_tokens: 128000
       input_price: 0.1
       output_price: 0.1
+    - name: microsoft/phi-3-medium-4k-instruct
+      max_input_tokens: 4000
+      input_price: 0.14
+      output_price: 0.14
+    - name: microsoft/phi-3-medium-128k-instruct
+      max_input_tokens: 128000
+      input_price: 1
+      output_price: 1
     - name: qwen/qwen-2-72b-instruct
       max_input_tokens: 32768
       input_price: 0.9
@@ -845,15 +906,6 @@
       output_price: 30
       supports_vision: true
       supports_function_calling: true
-    - name: openai/gpt-4-turbo-preview
-      max_input_tokens: 128000
-      input_price: 10
-      output_price: 30
-      supports_function_calling: true
-    - name: openai/gpt-4
-      max_input_tokens: 8192
-      input_price: 30
-      output_price: 60
     - name: openai/gpt-3.5-turbo
       max_input_tokens: 16385
       input_price: 0.5
@@ -876,6 +928,14 @@
       input_price: 0.125
       output_price: 0.375
       supports_function_calling: true
+    - name: anthropic/claude-3.5-sonnet
+      max_input_tokens: 200000
+      max_output_tokens: 4096
+      require_max_tokens: true
+      input_price: 3
+      output_price: 15
+      supports_vision: true
+      supports_function_calling: true
     - name: anthropic/claude-3-opus
       max_input_tokens: 200000
       max_output_tokens: 4096
@@ -900,6 +960,10 @@
       output_price: 1.25
       supports_vision: true
       supports_function_calling: true
+    - name: mistralai/mistral-7b-instruct-v0.3
+      max_input_tokens: 32768
+      input_price: 0.07
+      output_price: 0.07
     - name: mistralai/mixtral-8x7b-instruct
       max_input_tokens: 32768
       input_price: 0.24
@@ -908,17 +972,14 @@
       max_input_tokens: 65536
       input_price: 0.65
       output_price: 0.65
-      supports_function_calling: true
     - name: mistralai/mistral-small
       max_input_tokens: 32000
       input_price: 2
       output_price: 6
-      supports_function_calling: true
     - name: mistralai/mistral-large
       max_input_tokens: 32000
       input_price: 8
       output_price: 24
-      supports_function_calling: true
     - name: cohere/command-r
       max_input_tokens: 128000
       input_price: 0.5
@@ -937,6 +998,22 @@
       max_input_tokens: 32768
       input_price: 0.14
       output_price: 0.28
+    - name: perplexity/llama-3-sonar-small-32k-chat
+      max_input_tokens: 32768
+      input_price: 0.2
+      output_price: 0.2
+    - name: perplexity/llama-3-sonar-small-32k-online
+      max_input_tokens: 28000
+      input_price: 0.2
+      output_price: 0.2
+    - name: perplexity/llama-3-sonar-large-32k-chat
+      max_input_tokens: 32768
+      input_price: 1
+      output_price: 1
+    - name: perplexity/llama-3-sonar-large-32k-online
+      max_input_tokens: 28000
+      input_price: 1
+      output_price: 1
 
 
 - platform: octoai
@@ -964,10 +1041,15 @@
       max_input_tokens: 65536
       input_price: 0.86
       output_price: 0.86
+    - name: thenlper/gte-large
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
 
 - platform: together
   # docs:
   #   - https://docs.together.ai/docs/inference-models
+  #   - https://docs.together.ai/docs/embedding-models
   #   - https://www.together.ai/pricing
   models:
     - name: meta-llama/Llama-3-8b-chat-hf
@@ -990,7 +1072,35 @@
       max_input_tokens: 65536
       input_price: 1.2
       output_price: 1.2
+    - name: google/gemma-7b-it
+      max_input_tokens: 8192
+      input_price: 0.2
+      output_price: 0.2
     - name: Qwen/Qwen2-72B-Instruct
       max_input_tokens: 32768
       input_price: 0.9
-      output_price: 0.9
\ No newline at end of file
+      output_price: 0.9
+    - name: togethercomputer/m2-bert-80M-2k-retrieval
+      mode: embedding
+      max_input_tokens: 2048
+      default_chunk_size: 1500
+    - name: togethercomputer/m2-bert-80M-8k-retrieval
+      mode: embedding
+      max_input_tokens: 8192
+      default_chunk_size: 1500
+    - name: togethercomputer/m2-bert-80M-32k-retrieval
+      mode: embedding
+      max_input_tokens: 8192
+      default_chunk_size: 1500
+    - name: WhereIsAI/UAE-Large-V1
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: BAAI/bge-large-en-v1.5
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
+    - name: BAAI/bge-base-en-v1.5
+      mode: embedding
+      max_input_tokens: 512
+      default_chunk_size: 1000
\ No newline at end of file