From 6462a587428c38c17cbb6bce440cbf10c966eecc Mon Sep 17 00:00:00 2001 From: sigoden Date: Mon, 2 Sep 2024 07:04:29 +0800 Subject: [PATCH] chore: update models.yaml --- models.yaml | 203 ++++++++++++++++++++++++---------------------------- 1 file changed, 95 insertions(+), 108 deletions(-) diff --git a/models.yaml b/models.yaml index 106adbd..8ef8e7b 100644 --- a/models.yaml +++ b/models.yaml @@ -1,13 +1,11 @@ -# notes: -# - do not submit pull requests to add new models; this list will be updated in batches with new releases. +# Notes: +# - do not submit pull requests to add new models; this list will be updated in batches with new releases. +# Links: +# - https://platform.openai.com/docs/models +# - https://openai.com/pricing +# - https://platform.openai.com/docs/api-reference/chat - platform: openai - # docs: - # - https://platform.openai.com/docs/models - # - https://openai.com/pricing - # - https://platform.openai.com/docs/api-reference/chat - # notes - # - get max_output_tokens info from api error models: - name: gpt-4o max_input_tokens: 128000 @@ -63,13 +61,11 @@ default_chunk_size: 3000 max_batch_size: 100 +# Links: +# - https://ai.google.dev/models/gemini +# - https://ai.google.dev/pricing +# - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent - platform: gemini - # docs: - # - https://ai.google.dev/models/gemini - # - https://ai.google.dev/pricing - # - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent - # notes: - # - get max_output_tokens info from list models api models: - name: gemini-1.5-pro-latest max_input_tokens: 2097152 @@ -118,12 +114,10 @@ default_chunk_size: 1500 max_batch_size: 5 +# Links: +# - https://docs.anthropic.com/claude/docs/models-overview +# - https://docs.anthropic.com/claude/reference/messages-streaming - platform: claude - # docs: - # - https://docs.anthropic.com/claude/docs/models-overview - # - https://docs.anthropic.com/claude/reference/messages-streaming - # notes: - # - get max_output_tokens info from models doc models: - name: claude-3-5-sonnet-20240620 max_input_tokens: 200000 @@ -158,13 +152,11 @@ supports_vision: true supports_function_calling: true +# Links: +# - https://docs.mistral.ai/getting-started/models/ +# - https://mistral.ai/technology/#pricing +# - https://docs.mistral.ai/api/ - platform: mistral - # docs: - # - https://docs.mistral.ai/getting-started/models/ - # - https://mistral.ai/technology/#pricing - # - https://docs.mistral.ai/api/ - # notes: - # - unable to get max_output_tokens info models: - name: mistral-large-latest max_input_tokens: 128000 @@ -191,10 +183,11 @@ default_chunk_size: 2000 max_batch_size: 3 +# Links: +# - https://docs.ai21.com/docs/jamba-15-models +# - https://www.ai21.com/pricing +# - https://docs.ai21.com/reference/jamba-15-api-ref - platform: ai21 - # docs: - # - https://docs.ai21.com/reference/jamba-15-api-ref - # - https://www.ai21.com/pricing models: - name: jamba-1.5-large max_input_tokens: 256000 @@ -207,11 +200,11 @@ output_price: 0.4 supports_function_calling: true +# Links: +# - https://docs.cohere.com/docs/command-r-plus +# - https://cohere.com/pricing +# - https://docs.cohere.com/reference/chat - platform: cohere - # docs: - # - https://docs.cohere.com/docs/command-r-plus - # - https://cohere.com/pricing - # - https://docs.cohere.com/reference/chat models: - name: command-r-plus max_input_tokens: 128000 @@ -252,11 +245,11 @@ type: reranker max_input_tokens: 4096 +# Links: +# - https://docs.perplexity.ai/guides/model-cards +# - https://docs.perplexity.ai/guides/pricing +# - https://docs.perplexity.ai/api-reference/chat-completions - platform: perplexity - # docs: - # - https://docs.perplexity.ai/guides/model-cards - # - https://docs.perplexity.ai/guides/pricing - # - https://docs.perplexity.ai/api-reference/chat-completions models: - name: llama-3.1-sonar-huge-128k-online max_input_tokens: 127072 @@ -287,13 +280,10 @@ input_price: 0.2 output_price: 0.2 +# Links: +# - https://console.groq.com/docs/models +# - https://console.groq.com/docs/api-reference#chat - platform: groq - # docs: - # - https://console.groq.com/docs/models - # - https://wow.groq.com - # - https://console.groq.com/docs/text-chat - # notes: - # - all models are free with rate limits models: - name: llama3-70b-8192 max_input_tokens: 8192 @@ -329,10 +319,10 @@ output_price: 0 supports_function_calling: true +# Links: +# - https://ollama.com/library +# - https://github.com/ollama/ollama/blob/main/docs/openai.md - platform: ollama - # docs: - # - https://ollama.com/library - # - https://github.com/ollama/ollama/blob/main/docs/openai.md models: - name: llama3.1 max_input_tokens: 128000 @@ -353,14 +343,12 @@ default_chunk_size: 1000 max_batch_size: 50 +# Links: +# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models +# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models +# - https://cloud.google.com/vertex-ai/generative-ai/pricing +# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini - platform: vertexai - # docs: - # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models - # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models - # - https://cloud.google.com/vertex-ai/generative-ai/pricing - # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini - # notes: - # - get max_output_tokens info from models doc models: - name: gemini-1.5-pro-001 max_input_tokens: 2097152 @@ -441,13 +429,11 @@ default_chunk_size: 1500 max_batch_size: 5 +# Links: +# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns +# - https://aws.amazon.com/bedrock/pricing/ +# - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html - platform: bedrock - # docs: - # - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns - # - https://aws.amazon.com/bedrock/pricing/ - # - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html - # notes: - # - except for Claude, other models do not support streaming function calling models: - name: anthropic.claude-3-5-sonnet-20240620-v1:0 max_input_tokens: 200000 @@ -532,9 +518,9 @@ default_chunk_size: 1000 max_batch_size: 96 +# Links: +# - https://developers.cloudflare.com/workers-ai/models/ - platform: cloudflare - # docs: - # - https://developers.cloudflare.com/workers-ai/models/ models: - name: '@cf/meta/llama-3.1-8b-instruct' max_input_tokens: 6144 @@ -555,11 +541,11 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links: +# - https://replicate.com/explore +# - https://replicate.com/pricing +# - https://replicate.com/docs/reference/http#create-a-prediction-using-an-official-model - platform: replicate - # docs: - # - https://replicate.com/explore - # - https://replicate.com/pricing - # - https://replicate.com/docs/reference/http models: - name: meta/meta-llama-3.1-405b-instruct max_input_tokens: 128000 @@ -579,10 +565,10 @@ input_price: 0.05 output_price: 0.25 +# Links: +# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu +# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 - platform: ernie - # docs: - # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu - # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 models: - name: ernie-4.0-turbo-8k-preview max_input_tokens: 8192 @@ -620,11 +606,11 @@ max_input_tokens: 1024 input_price: 0.28 +# Links: +# - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction +# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing +# - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api - platform: qianwen - # docs: - # - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction - # - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing - # - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api models: - name: qwen-max max_input_tokens: 8000 @@ -671,11 +657,11 @@ default_chunk_size: 1500 max_batch_size: 25 +# Links: +# - https://platform.moonshot.cn/docs/intro +# - https://platform.moonshot.cn/docs/pricing/chat +# - https://platform.moonshot.cn/docs/api/chat - platform: moonshot - # docs: - # - https://platform.moonshot.cn/docs/intro - # - https://platform.moonshot.cn/docs/pricing - # - https://platform.moonshot.cn/docs/api-reference models: - name: moonshot-v1-8k max_input_tokens: 8000 @@ -693,10 +679,10 @@ output_price: 8.4 supports_function_calling: true +# Links: +# - https://platform.deepseek.com/api-docs/quick_start/pricing +# - https://platform.deepseek.com/api-docs/api/create-chat-completion - platform: deepseek - # docs: - # - https://platform.deepseek.com/api-docs/ - # - https://platform.deepseek.com/api-docs/pricing models: - name: deepseek-chat max_input_tokens: 32768 @@ -709,10 +695,11 @@ output_price: 0.28 supports_function_calling: true +# Links: +# - https://open.bigmodel.cn/dev/howuse/model +# - https://open.bigmodel.cn/pricing +# - https://open.bigmodel.cn/dev/api#glm-4 - platform: zhipuai - # docs: - # - https://open.bigmodel.cn/dev/howuse/model - # - https://open.bigmodel.cn/pricing models: - name: glm-4-plus max_input_tokens: 128000 @@ -756,10 +743,10 @@ default_chunk_size: 2000 max_batch_size: 3 +# Links: +# - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9 +# - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion - platform: lingyiwanwu - # docs: - # - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B - # - https://platform.lingyiwanwu.com/docs#%E8%AE%A1%E8%B4%B9%E5%8D%95%E5%85%83 models: - name: yi-large max_input_tokens: 32768 @@ -796,9 +783,9 @@ output_price: 0.84 supports_vision: true +# Links: +# - https://github.com/marketplace/models - platform: github - # docs: - # - https://github.com/marketplace/models models: - name: gpt-4o max_input_tokens: 128000 @@ -853,10 +840,10 @@ default_chunk_size: 1000 max_batch_size: 96 +# Links: +# - https://deepinfra.com/models +# - https://deepinfra.com/pricing - platform: deepinfra - # docs: - # - https://deepinfra.com/models - # - https://deepinfra.com/pricing models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct max_input_tokens: 32000 @@ -929,10 +916,10 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links: +# - https://fireworks.ai/models +# - https://fireworks.ai/pricing - platform: fireworks - # docs: - # - https://fireworks.ai/models - # - https://fireworks.ai/pricing models: - name: accounts/fireworks/models/llama-v3p1-405b-instruct max_input_tokens: 131072 @@ -992,9 +979,9 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links: +# - https://openrouter.ai/docs#models - platform: openrouter - # docs: - # - https://openrouter.ai/docs#models models: - name: openai/gpt-4o max_input_tokens: 128000 @@ -1229,10 +1216,10 @@ input_price: 0.9 output_price: 0.9 +# Links: +# - https://octo.ai/docs/getting-started/inference-models +# - https://octo.ai/docs/getting-started/pricing-and-billing - platform: octoai - # docs: - # - https://octo.ai/docs/getting-started/inference-models - # - https://octo.ai/docs/getting-started/pricing-and-billing models: - name: meta-llama-3.1-405b-instruct max_input_tokens: 131072 @@ -1257,11 +1244,11 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links: +# - https://docs.together.ai/docs/inference-models +# - https://docs.together.ai/docs/embedding-models +# - https://www.together.ai/pricing - platform: together - # docs: - # - https://docs.together.ai/docs/inference-models - # - https://docs.together.ai/docs/embedding-models - # - https://www.together.ai/pricing models: - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo max_input_tokens: 32768 @@ -1300,10 +1287,10 @@ default_chunk_size: 1000 max_batch_size: 100 +# Links: +# - https://jina.ai/ +# - https://api.jina.ai/redoc - platform: jina - # docs: - # - https://jina.ai/ - # - https://api.jina.ai/redoc models: - name: jina-clip-v1 type: embedding @@ -1346,11 +1333,11 @@ max_input_tokens: 8192 input_price: 0.02 +# Links: +# - https://docs.voyageai.com/docs/embeddings +# - https://docs.voyageai.com/docs/pricing +# - https://docs.voyageai.com/reference/ - platform: voyageai - # docs: - # - https://docs.voyageai.com/docs/embeddings - # - https://docs.voyageai.com/docs/pricing - # - https://docs.voyageai.com/reference/embeddings-api models: - name: voyage-large-2-instruct type: embedding