From 419c626485ca045adfe6d99e5ed34ebb51524ced Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Thu, 3 Oct 2024 12:00:23 +0800
Subject: [PATCH] chore: update models.yaml

---
 models.yaml | 261 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 166 insertions(+), 95 deletions(-)

diff --git a/models.yaml b/models.yaml
index 7056817..2ce5626 100644
--- a/models.yaml
+++ b/models.yaml
@@ -8,19 +8,19 @@
 - platform: openai
   models:
     - name: gpt-4o
-      max_input_tokens: 128000
-      max_output_tokens: 4096
-      input_price: 5
-      output_price: 15
-      supports_vision: true
-      supports_function_calling: true
-    - name: gpt-4o-2024-08-06
       max_input_tokens: 128000
       max_output_tokens: 16384
       input_price: 2.5
       output_price: 10
       supports_vision: true
       supports_function_calling: true
+    - name: gpt-4o-2024-05-13
+      max_input_tokens: 128000
+      max_output_tokens: 4096
+      input_price: 5
+      output_price: 15
+      supports_vision: true
+      supports_function_calling: true
     - name: chatgpt-4o-latest
       max_input_tokens: 128000
       max_output_tokens: 16384
@@ -311,36 +311,34 @@
 #  - https://console.groq.com/docs/api-reference#chat
 - platform: groq
   models:
-    - name: llama3-70b-8192
-      max_input_tokens: 8192
+    - name: llama-3.1-70b-versatile
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
       supports_function_calling: true
-    - name: llama3-8b-8192
-      max_input_tokens: 8192
+    - name: llama-3.1-8b-instant
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
       supports_function_calling: true
-    - name: llama3-groq-70b-8192-tool-use-preview
-      max_input_tokens: 8192
+    - name: llama-3.2-90b-vision-preview
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
-      supports_function_calling: true
-    - name: llama3-groq-8b-8192-tool-use-preview
-      max_input_tokens: 8192
+      supports_vision: true
+    - name: llama-3.2-11b-vision-preview
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
-      supports_function_calling: true
-    - name: llama-3.1-70b-versatile
-      max_input_tokens: 8192
+      supports_vision: true
+    - name: llama-3.2-3b-preview
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
-      supports_function_calling: true
-    - name: llama-3.1-8b-instant
-      max_input_tokens: 8192
+    - name: llama-3.2-1b-preview
+      max_input_tokens: 128000
       input_price: 0
       output_price: 0
-      supports_function_calling: true
     - name: gemma2-9b-it
       max_input_tokens: 8192
       input_price: 0
@@ -355,6 +353,9 @@
     - name: llama3.1
       max_input_tokens: 128000
       supports_function_calling: true
+    - name: llama3.2
+      max_input_tokens: 128000
+      supports_function_calling: true
     - name: gemma2
       max_input_tokens: 8192
     - name: qwen2.5
@@ -362,10 +363,10 @@
       supports_function_calling: true
     - name: phi3.5
       max_input_tokens: 128000
-    - name: mistral-small
+    - name: nemotron-mini
       max_input_tokens: 128000
       supports_function_calling: true
-    - name: mistral-nemo
+    - name: mistral-small
       max_input_tokens: 128000
       supports_function_calling: true
     - name: deepseek-coder-v2
@@ -478,7 +479,7 @@
       output_price: 15
       supports_vision: true
       supports_function_calling: true
-    - name: anthropic.claude-3-opus-20240229-v1:0
+    - name: us.anthropic.claude-3-opus-20240229-v1:0
       max_input_tokens: 200000
       max_output_tokens: 4096
       require_max_tokens: true
@@ -517,14 +518,26 @@
       input_price: 0.22
       output_price: 0.22
       supports_function_calling: true
-    - name: meta.llama3-70b-instruct-v1:0
-      max_input_tokens: 8192
-      input_price: 2.65
-      output_price: 3.5
-    - name: meta.llama3-8b-instruct-v1:0
-      max_input_tokens: 8192
-      input_price: 0.3
-      output_price: 0.6
+    - name: us.meta.llama3-2-90b-instruct-v1:0
+      max_input_tokens: 128000
+      input_price: 2
+      output_price: 2
+      supports_function_calling: true
+      supports_vision: true
+    - name: us.meta.llama3-2-11b-instruct-v1:0
+      max_input_tokens: 128000
+      input_price: 0.35
+      output_price: 0.35
+      supports_function_calling: true
+      supports_vision: true
+    - name: us.meta.llama3-2-3b-instruct-v1:0
+      max_input_tokens: 128000
+      input_price: 0.15
+      output_price: 0.15
+    - name: us.meta.llama3-2-1b-instruct-v1:0
+      max_input_tokens: 128000
+      input_price: 0.1
+      output_price: 0.1
     - name: mistral.mistral-large-2407-v1:0
       max_input_tokens: 128000
       input_price: 2
@@ -568,19 +581,31 @@
 #  - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
 - platform: cloudflare
   models:
+    - name: '@cf/meta/llama-3.1-70b-instruct'
+      max_input_tokens: 6144
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0
+      output_price: 0
     - name: '@cf/meta/llama-3.1-8b-instruct'
       max_input_tokens: 6144
       max_output_tokens: 2048
       require_max_tokens: true
       input_price: 0
       output_price: 0
-    - name: '@cf/meta/llama-3-8b-instruct'
+    - name: '@cf/meta/llama-3.2-11b-vision-instruct' 
       max_input_tokens: 6144
       max_output_tokens: 2048
       require_max_tokens: true
       input_price: 0
       output_price: 0
-    - name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
+    - name: '@cf/meta/llama-3.2-3b-instruct'
+      max_input_tokens: 6144
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0
+      output_price: 0
+    - name: '@cf/meta/llama-3.2-1b-instruct'
       max_input_tokens: 6144
       max_output_tokens: 2048
       require_max_tokens: true
@@ -598,12 +623,18 @@
 #  - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference
 - platform: huggingface
   models:
-    - name: meta-llama/Meta-Llama-3-8B-Instruct
+    - name: NousResearch/Hermes-3-Llama-3.1-8B
       max_input_tokens: 8192
       max_output_tokens: 4096
       require_max_tokens: true
       input_price: 0
       output_price: 0
+    - name: mistralai/Mistral-Small-Instruct-2409
+      max_input_tokens: 128000
+      max_output_tokens: 4096
+      require_max_tokens: true
+      input_price: 0
+      output_price: 0
     - name: mistralai/Mistral-Nemo-Instruct-2407
       max_input_tokens: 128000
       max_output_tokens: 4096
@@ -878,10 +909,12 @@
       max_input_tokens: 128000
     - name: meta-llama-3.1-8b-instruct
       max_input_tokens: 128000
-    - name: meta-llama-3-70b-instruct
+    - name: meta-llama-3.2-90b-vision-instruct
       max_input_tokens: 8192
-    - name: meta-llama-3-8b-instruct
+      supports_vision: true
+    - name: meta-llama-3.2-11b-vision-instruct
       max_input_tokens: 8192
+      supports_vision: true
     - name: mistral-large-2407
       max_input_tokens: 128000
       supports_function_calling: true
@@ -910,10 +943,13 @@
     - name: ai21-jamba-1.5-mini
       max_input_tokens: 256000
       supports_function_calling: true
+    - name: phi-3.5-moe-instruct
+      max_input_tokens: 128000
     - name: phi-3.5-mini-instruct
       max_input_tokens: 128000
-    - name: phi-3-medium-128k-instruct
+    - name: phi-3.5-vision-instruct
       max_input_tokens: 128000
+      supports_vision: true
 
 # Links:
 #  - https://deepinfra.com/models
@@ -935,14 +971,22 @@
       input_price: 0.055
       output_price: 0.055
       supports_function_calling: true
-    - name: meta-llama/Meta-Llama-3-70B-Instruct
-      max_input_tokens: 8192
+    - name: meta-llama/Llama-3.2-90B-Vision-Instruct
+      max_input_tokens: 128000
       input_price: 0.35
       output_price: 0.4
-    - name: meta-llama/Meta-Llama-3-8B-Instruct
-      max_input_tokens: 8192
+    - name: meta-llama/Llama-3.2-11B-Vision-Instruct
+      max_input_tokens: 128000
       input_price: 0.055
       output_price: 0.055
+    - name: meta-llama/Llama-3.2-3B-Instruct
+      max_input_tokens: 128000
+      input_price: 0.03
+      output_price: 0.05
+    - name: meta-llama/Llama-3.2-1B-Instruct
+      max_input_tokens: 128000
+      input_price: 0.01
+      output_price: 0.02
     - name: mistralai/Mistral-Nemo-Instruct-2407
       max_input_tokens: 128000
       input_price: 0.13
@@ -1008,18 +1052,32 @@
       max_input_tokens: 131072
       input_price: 0.2
       output_price: 0.2
-    - name: accounts/fireworks/models/llama-v3-70b-instruct
-      max_input_tokens: 8192
+    - name: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+      max_input_tokens: 131072
       input_price: 0.9
       output_price: 0.9
-    - name: accounts/fireworks/models/llama-v3-8b-instruct
-      max_input_tokens: 8192
+      supports_vision: true
+    - name: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+      max_input_tokens: 131072
       input_price: 0.2
       output_price: 0.2
+      supports_vision: true
+    - name: accounts/fireworks/models/llama-v3p2-3b-instruct
+      max_input_tokens: 131072
+      input_price: 0.1
+      output_price: 0.1
+    - name: accounts/fireworks/models/llama-v3p2-1b-instruct
+      max_input_tokens: 131072
+      input_price: 0.1
+      output_price: 0.1
     - name: accounts/fireworks/models/gemma2-9b-it
       max_input_tokens: 8192
       input_price: 0.2
       output_price: 0.2
+    - name: accounts/fireworks/models/qwen2p5-72b-instruct
+      max_input_tokens: 32768
+      input_price: 0.9
+      output_price: 0.9
     - name: accounts/fireworks/models/phi-3-vision-128k-instruct
       max_input_tokens: 131072
       input_price: 0.2
@@ -1060,14 +1118,14 @@
   models:
     - name: openai/gpt-4o
       max_input_tokens: 128000
-      input_price: 5
-      output_price: 15
+      input_price: 2.5
+      output_price: 10
       supports_vision: true
       supports_function_calling: true
-    - name: openai/gpt-4o-2024-08-06
+    - name: openai/gpt-4o-2024-05-13
       max_input_tokens: 128000
-      input_price: 2.5
-      output_price: 10
+      input_price: 5
+      output_price: 15
       supports_vision: true
       supports_function_calling: true
     - name: openai/chatgpt-4o-latest
@@ -1190,17 +1248,24 @@
       max_input_tokens: 131072
       input_price: 0.09
       output_price: 0.09
-      supports_function_calling: true
-    - name: meta-llama/llama-3-70b-instruct
-      max_input_tokens: 8192
-      input_price: 0.59
-      output_price: 0.79
-      supports_function_calling: true
-    - name: meta-llama/llama-3-8b-instruct
-      max_input_tokens: 8192
-      input_price: 0.07
-      output_price: 0.07
-      supports_function_calling: true
+    - name: meta-llama/llama-3.2-90b-vision-instruct
+      max_input_tokens: 131072
+      input_price: 0.35
+      output_price: 0.4
+      supports_vision: true
+    - name: meta-llama/llama-3.2-11b-vision-instruct
+      max_input_tokens: 131072
+      input_price: 0.055
+      output_price: 0.055
+      supports_vision: true
+    - name: meta-llama/llama-3.2-3b-instruct
+      max_input_tokens: 131072
+      input_price: 0.03
+      output_price: 0.05
+    - name: meta-llama/llama-3.2-1b-instruct
+      max_input_tokens: 131072
+      input_price: 0.01
+      output_price: 0.02
     - name: mistralai/mistral-large
       max_input_tokens: 128000
       input_price: 2
@@ -1346,8 +1411,8 @@
       max_batch_size: 100
 
 # Links
-#  - https://siliconflow.cn/zh-cn/maaspricing
-#  - https://docs.siliconflow.cn/reference/chat-completions-3
+#  - https://siliconflow.cn/zh-cn/pricing#siliconcloud-1417
+#  - https://docs.siliconflow.cn/api-reference/chat-completions/chat-completions
 - platform: siliconflow
   models:
     - name: meta-llama/Meta-Llama-3.1-405B-Instruct
@@ -1366,10 +1431,12 @@
       max_input_tokens: 32768
       input_price: 0.578
       output_price: 0.578
+      supports_function_calling: true
     - name: Qwen/Qwen2.5-7B-Instruct
       max_input_tokens: 32768
       input_price: 0
       output_price: 0
+      supports_function_calling: true
     - name: Qwen/Qwen2.5-Coder-7B-Instruct
       max_input_tokens: 32768
       input_price: 0
@@ -1386,6 +1453,7 @@
       max_input_tokens: 32768
       input_price: 0.186
       output_price: 0.186
+      supports_function_calling: true
     - name: BAAI/bge-large-en-v1.5
       type: embedding
       input_price: 0
@@ -1417,28 +1485,33 @@
   models:
     - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
       max_input_tokens: 32768
-      input_price: 5
-      output_price: 5
+      input_price: 3.5
+      output_price: 3.5
+      supports_function_calling: true
     - name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-      max_input_tokens: 8192
+      max_input_tokens: 32768
       input_price: 0.88
       output_price: 0.88
+      supports_function_calling: true
     - name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-      max_input_tokens: 8192
+      max_input_tokens: 32768
       input_price: 0.18
       output_price: 0.18
-    - name: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
-      max_input_tokens: 8192
+      supports_function_calling: true
+    - name: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+      max_input_tokens: 131072
       input_price: 0.88
       output_price: 0.88
-    - name: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
-      max_input_tokens: 8192
+      supports_vision: true
+    - name: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+      max_input_tokens: 131072
       input_price: 0.18
       output_price: 0.18
-    - name: Qwen/Qwen2-72B-Instruct
-      max_input_tokens: 32768
-      input_price: 0.9
-      output_price: 0.9
+      supports_vision: true
+    - name: meta-llama/Llama-3.2-3B-Instruct-Turbo
+      max_input_tokens: 131072
+      input_price: 0.06
+      output_price: 0.06
     - name: WhereIsAI/UAE-Large-V1
       type: embedding
       input_price: 0.016
@@ -1451,45 +1524,39 @@
       max_tokens_per_chunk: 512
       default_chunk_size: 1000
       max_batch_size: 100
+    - name: Salesforce/Llama-Rank-V1
+      type: reranker
+      max_input_tokens: 8192
+      input_price: 0.1
 
 # Links:
 #  - https://jina.ai/
 #  - https://api.jina.ai/redoc
 - platform: jina
   models:
-    - name: jina-clip-v1
+    - name: jina-embeddings-v3
       type: embedding
       input_price: 0
       max_tokens_per_chunk: 8192
-      default_chunk_size: 1500
+      default_chunk_size: 2000
       max_batch_size: 100
-    - name: jina-embeddings-v2-base-en
+    - name: jina-colbert-v2
       type: embedding
       input_price: 0
       max_tokens_per_chunk: 8192
       default_chunk_size: 1500
       max_batch_size: 100
-    - name: jina-embeddings-v2-base-zh
+    - name: jina-clip-v1
       type: embedding
       input_price: 0
       max_tokens_per_chunk: 8192
       default_chunk_size: 1500
       max_batch_size: 100
     - name: jina-colbert-v2
-      type: embedding
-      input_price: 0
-      max_tokens_per_chunk: 8192
-      default_chunk_size: 1500
-      max_batch_size: 100
-    - name: jina-reranker-v2-base-multilingual
-      type: reranker
-      max_input_tokens: 1024
-      input_price: 0
-    - name: jina-reranker-v1-base-en
       type: reranker
       max_input_tokens: 8192
       input_price: 0
-    - name: jina-colbert-v2
+    - name: jina-reranker-v2-base-multilingual
       type: reranker
       max_input_tokens: 8192
       input_price: 0
@@ -1528,7 +1595,11 @@
       max_tokens_per_chunk: 16000
       default_chunk_size: 3000
       max_batch_size: 128
-    - name: rerank-1
+    - name: rerank-2
       type: reranker
-      max_input_tokens: 8000
+      max_input_tokens: 16000
       input_price: 0.05
+    - name: rerank-2-lite
+      type: reranker
+      max_input_tokens: 8000
+      input_price: 0.02