From 419c626485ca045adfe6d99e5ed34ebb51524ced Mon Sep 17 00:00:00 2001 From: sigoden Date: Thu, 3 Oct 2024 12:00:23 +0800 Subject: [PATCH] chore: update models.yaml --- models.yaml | 261 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 166 insertions(+), 95 deletions(-) diff --git a/models.yaml b/models.yaml index 7056817..2ce5626 100644 --- a/models.yaml +++ b/models.yaml @@ -8,19 +8,19 @@ - platform: openai models: - name: gpt-4o - max_input_tokens: 128000 - max_output_tokens: 4096 - input_price: 5 - output_price: 15 - supports_vision: true - supports_function_calling: true - - name: gpt-4o-2024-08-06 max_input_tokens: 128000 max_output_tokens: 16384 input_price: 2.5 output_price: 10 supports_vision: true supports_function_calling: true + - name: gpt-4o-2024-05-13 + max_input_tokens: 128000 + max_output_tokens: 4096 + input_price: 5 + output_price: 15 + supports_vision: true + supports_function_calling: true - name: chatgpt-4o-latest max_input_tokens: 128000 max_output_tokens: 16384 @@ -311,36 +311,34 @@ # - https://console.groq.com/docs/api-reference#chat - platform: groq models: - - name: llama3-70b-8192 - max_input_tokens: 8192 + - name: llama-3.1-70b-versatile + max_input_tokens: 128000 input_price: 0 output_price: 0 supports_function_calling: true - - name: llama3-8b-8192 - max_input_tokens: 8192 + - name: llama-3.1-8b-instant + max_input_tokens: 128000 input_price: 0 output_price: 0 supports_function_calling: true - - name: llama3-groq-70b-8192-tool-use-preview - max_input_tokens: 8192 + - name: llama-3.2-90b-vision-preview + max_input_tokens: 128000 input_price: 0 output_price: 0 - supports_function_calling: true - - name: llama3-groq-8b-8192-tool-use-preview - max_input_tokens: 8192 + supports_vision: true + - name: llama-3.2-11b-vision-preview + max_input_tokens: 128000 input_price: 0 output_price: 0 - supports_function_calling: true - - name: llama-3.1-70b-versatile - max_input_tokens: 8192 + supports_vision: true + - name: llama-3.2-3b-preview + max_input_tokens: 128000 input_price: 0 output_price: 0 - supports_function_calling: true - - name: llama-3.1-8b-instant - max_input_tokens: 8192 + - name: llama-3.2-1b-preview + max_input_tokens: 128000 input_price: 0 output_price: 0 - supports_function_calling: true - name: gemma2-9b-it max_input_tokens: 8192 input_price: 0 @@ -355,6 +353,9 @@ - name: llama3.1 max_input_tokens: 128000 supports_function_calling: true + - name: llama3.2 + max_input_tokens: 128000 + supports_function_calling: true - name: gemma2 max_input_tokens: 8192 - name: qwen2.5 @@ -362,10 +363,10 @@ supports_function_calling: true - name: phi3.5 max_input_tokens: 128000 - - name: mistral-small + - name: nemotron-mini max_input_tokens: 128000 supports_function_calling: true - - name: mistral-nemo + - name: mistral-small max_input_tokens: 128000 supports_function_calling: true - name: deepseek-coder-v2 @@ -478,7 +479,7 @@ output_price: 15 supports_vision: true supports_function_calling: true - - name: anthropic.claude-3-opus-20240229-v1:0 + - name: us.anthropic.claude-3-opus-20240229-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true @@ -517,14 +518,26 @@ input_price: 0.22 output_price: 0.22 supports_function_calling: true - - name: meta.llama3-70b-instruct-v1:0 - max_input_tokens: 8192 - input_price: 2.65 - output_price: 3.5 - - name: meta.llama3-8b-instruct-v1:0 - max_input_tokens: 8192 - input_price: 0.3 - output_price: 0.6 + - name: us.meta.llama3-2-90b-instruct-v1:0 + max_input_tokens: 128000 + input_price: 2 + output_price: 2 + supports_function_calling: true + supports_vision: true + - name: us.meta.llama3-2-11b-instruct-v1:0 + max_input_tokens: 128000 + input_price: 0.35 + output_price: 0.35 + supports_function_calling: true + supports_vision: true + - name: us.meta.llama3-2-3b-instruct-v1:0 + max_input_tokens: 128000 + input_price: 0.15 + output_price: 0.15 + - name: us.meta.llama3-2-1b-instruct-v1:0 + max_input_tokens: 128000 + input_price: 0.1 + output_price: 0.1 - name: mistral.mistral-large-2407-v1:0 max_input_tokens: 128000 input_price: 2 @@ -568,19 +581,31 @@ # - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/ - platform: cloudflare models: + - name: '@cf/meta/llama-3.1-70b-instruct' + max_input_tokens: 6144 + max_output_tokens: 2048 + require_max_tokens: true + input_price: 0 + output_price: 0 - name: '@cf/meta/llama-3.1-8b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - - name: '@cf/meta/llama-3-8b-instruct' + - name: '@cf/meta/llama-3.2-11b-vision-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - - name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq' + - name: '@cf/meta/llama-3.2-3b-instruct' + max_input_tokens: 6144 + max_output_tokens: 2048 + require_max_tokens: true + input_price: 0 + output_price: 0 + - name: '@cf/meta/llama-3.2-1b-instruct' max_input_tokens: 6144 max_output_tokens: 2048 require_max_tokens: true @@ -598,12 +623,18 @@ # - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference - platform: huggingface models: - - name: meta-llama/Meta-Llama-3-8B-Instruct + - name: NousResearch/Hermes-3-Llama-3.1-8B max_input_tokens: 8192 max_output_tokens: 4096 require_max_tokens: true input_price: 0 output_price: 0 + - name: mistralai/Mistral-Small-Instruct-2409 + max_input_tokens: 128000 + max_output_tokens: 4096 + require_max_tokens: true + input_price: 0 + output_price: 0 - name: mistralai/Mistral-Nemo-Instruct-2407 max_input_tokens: 128000 max_output_tokens: 4096 @@ -878,10 +909,12 @@ max_input_tokens: 128000 - name: meta-llama-3.1-8b-instruct max_input_tokens: 128000 - - name: meta-llama-3-70b-instruct + - name: meta-llama-3.2-90b-vision-instruct max_input_tokens: 8192 - - name: meta-llama-3-8b-instruct + supports_vision: true + - name: meta-llama-3.2-11b-vision-instruct max_input_tokens: 8192 + supports_vision: true - name: mistral-large-2407 max_input_tokens: 128000 supports_function_calling: true @@ -910,10 +943,13 @@ - name: ai21-jamba-1.5-mini max_input_tokens: 256000 supports_function_calling: true + - name: phi-3.5-moe-instruct + max_input_tokens: 128000 - name: phi-3.5-mini-instruct max_input_tokens: 128000 - - name: phi-3-medium-128k-instruct + - name: phi-3.5-vision-instruct max_input_tokens: 128000 + supports_vision: true # Links: # - https://deepinfra.com/models @@ -935,14 +971,22 @@ input_price: 0.055 output_price: 0.055 supports_function_calling: true - - name: meta-llama/Meta-Llama-3-70B-Instruct - max_input_tokens: 8192 + - name: meta-llama/Llama-3.2-90B-Vision-Instruct + max_input_tokens: 128000 input_price: 0.35 output_price: 0.4 - - name: meta-llama/Meta-Llama-3-8B-Instruct - max_input_tokens: 8192 + - name: meta-llama/Llama-3.2-11B-Vision-Instruct + max_input_tokens: 128000 input_price: 0.055 output_price: 0.055 + - name: meta-llama/Llama-3.2-3B-Instruct + max_input_tokens: 128000 + input_price: 0.03 + output_price: 0.05 + - name: meta-llama/Llama-3.2-1B-Instruct + max_input_tokens: 128000 + input_price: 0.01 + output_price: 0.02 - name: mistralai/Mistral-Nemo-Instruct-2407 max_input_tokens: 128000 input_price: 0.13 @@ -1008,18 +1052,32 @@ max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 - - name: accounts/fireworks/models/llama-v3-70b-instruct - max_input_tokens: 8192 + - name: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + max_input_tokens: 131072 input_price: 0.9 output_price: 0.9 - - name: accounts/fireworks/models/llama-v3-8b-instruct - max_input_tokens: 8192 + supports_vision: true + - name: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 + supports_vision: true + - name: accounts/fireworks/models/llama-v3p2-3b-instruct + max_input_tokens: 131072 + input_price: 0.1 + output_price: 0.1 + - name: accounts/fireworks/models/llama-v3p2-1b-instruct + max_input_tokens: 131072 + input_price: 0.1 + output_price: 0.1 - name: accounts/fireworks/models/gemma2-9b-it max_input_tokens: 8192 input_price: 0.2 output_price: 0.2 + - name: accounts/fireworks/models/qwen2p5-72b-instruct + max_input_tokens: 32768 + input_price: 0.9 + output_price: 0.9 - name: accounts/fireworks/models/phi-3-vision-128k-instruct max_input_tokens: 131072 input_price: 0.2 @@ -1060,14 +1118,14 @@ models: - name: openai/gpt-4o max_input_tokens: 128000 - input_price: 5 - output_price: 15 + input_price: 2.5 + output_price: 10 supports_vision: true supports_function_calling: true - - name: openai/gpt-4o-2024-08-06 + - name: openai/gpt-4o-2024-05-13 max_input_tokens: 128000 - input_price: 2.5 - output_price: 10 + input_price: 5 + output_price: 15 supports_vision: true supports_function_calling: true - name: openai/chatgpt-4o-latest @@ -1190,17 +1248,24 @@ max_input_tokens: 131072 input_price: 0.09 output_price: 0.09 - supports_function_calling: true - - name: meta-llama/llama-3-70b-instruct - max_input_tokens: 8192 - input_price: 0.59 - output_price: 0.79 - supports_function_calling: true - - name: meta-llama/llama-3-8b-instruct - max_input_tokens: 8192 - input_price: 0.07 - output_price: 0.07 - supports_function_calling: true + - name: meta-llama/llama-3.2-90b-vision-instruct + max_input_tokens: 131072 + input_price: 0.35 + output_price: 0.4 + supports_vision: true + - name: meta-llama/llama-3.2-11b-vision-instruct + max_input_tokens: 131072 + input_price: 0.055 + output_price: 0.055 + supports_vision: true + - name: meta-llama/llama-3.2-3b-instruct + max_input_tokens: 131072 + input_price: 0.03 + output_price: 0.05 + - name: meta-llama/llama-3.2-1b-instruct + max_input_tokens: 131072 + input_price: 0.01 + output_price: 0.02 - name: mistralai/mistral-large max_input_tokens: 128000 input_price: 2 @@ -1346,8 +1411,8 @@ max_batch_size: 100 # Links -# - https://siliconflow.cn/zh-cn/maaspricing -# - https://docs.siliconflow.cn/reference/chat-completions-3 +# - https://siliconflow.cn/zh-cn/pricing#siliconcloud-1417 +# - https://docs.siliconflow.cn/api-reference/chat-completions/chat-completions - platform: siliconflow models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct @@ -1366,10 +1431,12 @@ max_input_tokens: 32768 input_price: 0.578 output_price: 0.578 + supports_function_calling: true - name: Qwen/Qwen2.5-7B-Instruct max_input_tokens: 32768 input_price: 0 output_price: 0 + supports_function_calling: true - name: Qwen/Qwen2.5-Coder-7B-Instruct max_input_tokens: 32768 input_price: 0 @@ -1386,6 +1453,7 @@ max_input_tokens: 32768 input_price: 0.186 output_price: 0.186 + supports_function_calling: true - name: BAAI/bge-large-en-v1.5 type: embedding input_price: 0 @@ -1417,28 +1485,33 @@ models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo max_input_tokens: 32768 - input_price: 5 - output_price: 5 + input_price: 3.5 + output_price: 3.5 + supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo - max_input_tokens: 8192 + max_input_tokens: 32768 input_price: 0.88 output_price: 0.88 + supports_function_calling: true - name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - max_input_tokens: 8192 + max_input_tokens: 32768 input_price: 0.18 output_price: 0.18 - - name: meta-llama/Meta-Llama-3-70B-Instruct-Turbo - max_input_tokens: 8192 + supports_function_calling: true + - name: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + max_input_tokens: 131072 input_price: 0.88 output_price: 0.88 - - name: meta-llama/Meta-Llama-3-8B-Instruct-Turbo - max_input_tokens: 8192 + supports_vision: true + - name: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + max_input_tokens: 131072 input_price: 0.18 output_price: 0.18 - - name: Qwen/Qwen2-72B-Instruct - max_input_tokens: 32768 - input_price: 0.9 - output_price: 0.9 + supports_vision: true + - name: meta-llama/Llama-3.2-3B-Instruct-Turbo + max_input_tokens: 131072 + input_price: 0.06 + output_price: 0.06 - name: WhereIsAI/UAE-Large-V1 type: embedding input_price: 0.016 @@ -1451,45 +1524,39 @@ max_tokens_per_chunk: 512 default_chunk_size: 1000 max_batch_size: 100 + - name: Salesforce/Llama-Rank-V1 + type: reranker + max_input_tokens: 8192 + input_price: 0.1 # Links: # - https://jina.ai/ # - https://api.jina.ai/redoc - platform: jina models: - - name: jina-clip-v1 + - name: jina-embeddings-v3 type: embedding input_price: 0 max_tokens_per_chunk: 8192 - default_chunk_size: 1500 + default_chunk_size: 2000 max_batch_size: 100 - - name: jina-embeddings-v2-base-en + - name: jina-colbert-v2 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - - name: jina-embeddings-v2-base-zh + - name: jina-clip-v1 type: embedding input_price: 0 max_tokens_per_chunk: 8192 default_chunk_size: 1500 max_batch_size: 100 - name: jina-colbert-v2 - type: embedding - input_price: 0 - max_tokens_per_chunk: 8192 - default_chunk_size: 1500 - max_batch_size: 100 - - name: jina-reranker-v2-base-multilingual - type: reranker - max_input_tokens: 1024 - input_price: 0 - - name: jina-reranker-v1-base-en type: reranker max_input_tokens: 8192 input_price: 0 - - name: jina-colbert-v2 + - name: jina-reranker-v2-base-multilingual type: reranker max_input_tokens: 8192 input_price: 0 @@ -1528,7 +1595,11 @@ max_tokens_per_chunk: 16000 default_chunk_size: 3000 max_batch_size: 128 - - name: rerank-1 + - name: rerank-2 type: reranker - max_input_tokens: 8000 + max_input_tokens: 16000 input_price: 0.05 + - name: rerank-2-lite + type: reranker + max_input_tokens: 8000 + input_price: 0.02