# notes: # - do not submit pull requests to add new models; this list will be updated in batches with new releases. - platform: openai # docs: # - https://platform.openai.com/docs/models # - https://openai.com/pricing # - https://platform.openai.com/docs/api-reference/chat # notes # - get max_output_tokens info from api error models: - name: gpt-4o max_input_tokens: 128000 max_output_tokens: 4096 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: gpt-4-turbo max_input_tokens: 128000 max_output_tokens: 4096 input_price: 10 output_price: 30 supports_vision: true supports_function_calling: true - name: gpt-4-turbo-preview max_input_tokens: 128000 max_output_tokens: 4096 input_price: 10 output_price: 30 supports_function_calling: true - name: gpt-4-1106-preview max_input_tokens: 128000 max_output_tokens: 4096 input_price: 10 output_price: 30 supports_function_calling: true - name: gpt-4 max_input_tokens: 8192 max_output_tokens: 4096 input_price: 30 output_price: 60 - name: gpt-3.5-turbo max_input_tokens: 16385 max_output_tokens: 4096 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: gpt-3.5-turbo-1106 max_input_tokens: 16385 max_output_tokens: 4096 input_price: 1 output_price: 2 supports_function_calling: true - name: text-embedding-3-large mode: embedding max_input_tokens: 8191 default_chunk_size: 4000 max_concurrent_chunks: 100 - name: text-embedding-3-small mode: embedding max_input_tokens: 8191 default_chunk_size: 4000 max_concurrent_chunks: 100 - platform: gemini # docs: # - https://ai.google.dev/models/gemini # - https://ai.google.dev/pricing # - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent # notes: # - get max_output_tokens info from list models api models: - name: gemini-1.5-pro-latest max_input_tokens: 1048576 max_output_tokens: 8192 input_price: 3.5 output_price: 10.5 supports_vision: true supports_function_calling: true - name: gemini-1.5-flash-latest max_input_tokens: 1048576 max_output_tokens: 8192 input_price: 0.35 output_price: 1.05 supports_vision: true supports_function_calling: true - name: gemini-1.0-pro-latest max_input_tokens: 30720 max_output_tokens: 2048 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: text-embedding-004 mode: embedding max_input_tokens: 2048 default_chunk_size: 2000 - platform: claude # docs: # - https://docs.anthropic.com/claude/docs/models-overview # - https://docs.anthropic.com/claude/reference/messages-streaming # notes: # - get max_output_tokens info from models doc models: - name: claude-3-opus-20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: claude-3-sonnet-20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-haiku-20240307 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - platform: mistral # docs: # - https://docs.mistral.ai/getting-started/models/ # - https://mistral.ai/technology/#pricing # - https://docs.mistral.ai/api/ # notes: # - unable to get max_output_tokens info models: - name: open-mistral-7b max_input_tokens: 32000 input_price: 0.25 output_price: 0.25 - name: open-mixtral-8x7b max_input_tokens: 32000 input_price: 0.7 output_price: 0.7 - name: open-mixtral-8x22b max_input_tokens: 64000 input_price: 2 output_price: 6 supports_function_calling: true - name: mistral-small-latest max_input_tokens: 32000 input_price: 1 output_price: 3 supports_function_calling: true - name: mistral-large-latest max_input_tokens: 32000 input_price: 4 output_price: 12 supports_function_calling: true - name: codestral-latest max_input_tokens: 32000 input_price: 1 output_price: 3 - name: mistral-embed mode: embedding max_input_tokens: 8092 default_chunk_size: 4000 - platform: cohere # docs: # - https://docs.cohere.com/docs/command-r # - https://cohere.com/pricing # - https://docs.cohere.com/reference/chat # notes # - get max_output_tokens info from api error models: - name: command-r max_input_tokens: 128000 max_output_tokens: 4000 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: command-r-plus max_input_tokens: 128000 max_output_tokens: 4000 input_price: 3 output_price: 15 supports_function_calling: true - name: embed-english-v3.0 mode: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 96 - name: embed-multilingual-v3.0 mode: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 96 - platform: perplexity # docs: # - https://docs.perplexity.ai/docs/model-cards # - https://docs.perplexity.ai/docs/pricing # - https://docs.perplexity.ai/reference/post_chat_completions # notes # - get max_output_tokens info from api error models: - name: llama-3-sonar-small-32k-chat max_input_tokens: 32768 max_output_tokens: 32768 input_price: 0.2 output_price: 0.2 - name: llama-3-sonar-large-32k-chat max_input_tokens: 32768 max_output_tokens: 32768 input_price: 0.6 output_price: 0.6 - name: llama-3-8b-instruct max_input_tokens: 8192 max_output_tokens: 8192 input_price: 0.2 output_price: 0.2 - name: llama-3-70b-instruct max_input_tokens: 8192 max_output_tokens: 8192 input_price: 1 output_price: 1 - name: mixtral-8x7b-instruct max_input_tokens: 16384 max_output_tokens: 16384 input_price: 0.6 output_price: 0.6 - platform: groq # docs: # - https://console.groq.com/docs/models # - https://wow.groq.com # - https://console.groq.com/docs/text-chat # notes: # - get max_output_tokens info from playgourd # - all models are free with rate limits models: - name: llama3-8b-8192 max_input_tokens: 8192 max_output_tokens: 8192 input_price: 0.05 output_price: 0.10 - name: llama3-70b-8192 max_input_tokens: 8192 max_output_tokens: 8192 input_price: 0.59 output_price: 0.79 - name: mixtral-8x7b-32768 max_input_tokens: 32768 max_output_tokens: 32768 input_price: 0.27 output_price: 0.27 - platform: vertexai # docs: # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models # - https://cloud.google.com/vertex-ai/generative-ai/pricing # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini # notes: # - get max_output_tokens info from models doc models: - name: gemini-1.5-pro-001 max_input_tokens: 1000000 max_output_tokens: 8192 input_price: 1.25 output_price: 3.75 supports_vision: true supports_function_calling: true - name: gemini-1.5-flash-001 max_input_tokens: 1000000 max_output_tokens: 8192 input_price: 0.125 output_price: 0.375 supports_vision: true supports_function_calling: true - name: gemini-1.0-pro-002 max_input_tokens: 24568 max_output_tokens: 8192 input_price: 0.125 output_price: 0.375 supports_function_calling: true - name: text-embedding-004 mode: embedding max_input_tokens: 3072 default_chunk_size: 3000 max_concurrent_chunks: 5 - name: text-multilingual-embedding-002 mode: embedding max_input_tokens: 3072 default_chunk_size: 3000 max_concurrent_chunks: 5 - platform: vertexai-claude # docs: # - https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude # notes: # - get max_output_tokens info from models doc # - claude models have not been tested models: - name: claude-3-opus@20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: claude-3-sonnet@20240229 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: claude-3-haiku@20240307 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - platform: bedrock # docs: # - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns # - https://aws.amazon.com/bedrock/pricing/ # notes: # - get max_output_tokens info from playground # - claude/llama models have not been tested models: - name: anthropic.claude-3-opus-20240229-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: anthropic.claude-3-sonnet-20240229-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic.claude-3-haiku-20240307-v1:0 max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - name: meta.llama3-8b-instruct-v1:0 max_input_tokens: 8192 max_output_tokens: 2048 require_max_tokens: true input_price: 0.4 output_price: 0.6 - name: meta.llama3-70b-instruct-v1:0 max_input_tokens: 8192 max_output_tokens: 2048 require_max_tokens: true input_price: 2.65 output_price: 3.5 - name: mistral.mistral-7b-instruct-v0:2 max_input_tokens: 32000 max_output_tokens: 8192 require_max_tokens: true input_price: 0.15 output_price: 0.2 - name: mistral.mixtral-8x7b-instruct-v0:1 max_input_tokens: 32000 max_output_tokens: 8192 require_max_tokens: true input_price: 0.45 output_price: 0.7 - name: mistral.mistral-large-2402-v1:0 max_input_tokens: 32000 max_output_tokens: 8192 require_max_tokens: true input_price: 8 output_price: 2.4 - platform: cloudflare # docs: # - https://developers.cloudflare.com/workers-ai/models/ # - https://developers.cloudflare.com/workers-ai/platform/pricing/ # notes: # - unable to get max_output_tokens info models: - name: '@cf/meta/llama-3-8b-instruct' max_input_tokens: 4096 max_output_tokens: 4096 require_max_tokens: true - name: '@cf/mistral/mistral-7b-instruct-v0.2-lora' max_input_tokens: 4096 max_output_tokens: 4096 require_max_tokens: true - name: '@cf/qwen/qwen1.5-14b-chat-awq' max_input_tokens: 4096 max_output_tokens: 4096 require_max_tokens: true - platform: replicate # docs: # - https://replicate.com/explore # - https://replicate.com/pricing # - https://replicate.com/docs/reference/http # notes: # - max_output_tokens is required but unknown models: - name: meta/meta-llama-3-70b-instruct max_input_tokens: 8192 max_output_tokens: 4096 require_max_tokens: true input_price: 0.65 output_price: 2.75 - name: meta/meta-llama-3-8b-instruct max_input_tokens: 8192 max_output_tokens: 4096 require_max_tokens: true input_price: 0.05 output_price: 0.25 - name: mistralai/mistral-7b-instruct-v0.2 max_input_tokens: 32000 max_output_tokens: 8192 require_max_tokens: true input_price: 0.05 output_price: 0.25 - name: mistralai/mixtral-8x7b-instruct-v0.1 max_input_tokens: 32000 max_output_tokens: 8192 require_max_tokens: true input_price: 0.3 output_price: 1 - platform: ernie # docs: # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 # notes: # - get max_output_tokens info from models doc models: - name: ernie-4.0-8k-0613 max_input_tokens: 5120 max_output_tokens: 2048 require_max_tokens: true input_price: 16.8 output_price: 16.8 - name: ernie-3.5-8k-0613 max_input_tokens: 5120 max_output_tokens: 2048 require_max_tokens: true input_price: 1.68 output_price: 1.68 - name: ernie-speed-128k max_input_tokens: 124000 max_output_tokens: 4096 require_max_tokens: true input_price: 0 output_price: 0 - name: ernie-lite-8k max_input_tokens: 7168 max_output_tokens: 2048 require_max_tokens: true input_price: 0 output_price: 0 - platform: qianwen # docs: # - https://help.aliyun.com/zh/dashscope/developer-reference/tongyiqianwen-large-language-models/ # - https://help.aliyun.com/zh/dashscope/developer-reference/qwen-vl-plus/ # notes: # - get max_output_tokens info from models doc models: - name: qwen-long max_input_tokens: 1000000 input_price: 0.07 output_price: 0.28 - name: qwen-turbo max_input_tokens: 6000 max_output_tokens: 1500 input_price: 0.28 output_price: 0.84 - name: qwen-plus max_input_tokens: 30000 max_output_tokens: 2000 input_price: 0.56 output_price: 1.68 - name: qwen-max max_input_tokens: 6000 max_output_tokens: 2000 input_price: 5.6 output_price: 16.8 - name: qwen-max-longcontext input_price: 5.6 output_price: 16.8 max_input_tokens: 28000 max_output_tokens: 2000 - name: qwen-vl-plus input_price: 1.12 output_price: 1.12 supports_vision: true - name: qwen-vl-max input_price: 2.8 output_price: 2.8 supports_vision: true - name: text-embedding-v2 mode: embedding max_input_tokens: 2048 default_chunk_size: 2000 max_concurrent_chunks: 5 - platform: moonshot # docs: # - https://platform.moonshot.cn/docs/intro # - https://platform.moonshot.cn/docs/pricing # - https://platform.moonshot.cn/docs/api-reference # notes: # - unable to get max_output_tokens info models: - name: moonshot-v1-8k max_input_tokens: 8000 input_price: 1.68 output_price: 1.68 - name: moonshot-v1-32k max_input_tokens: 32000 input_price: 3.36 output_price: 3.36 - name: moonshot-v1-128k max_input_tokens: 128000 input_price: 8.4 output_price: 8.4 - platform: deepseek # docs: # - https://platform.deepseek.com/api-docs/ # - https://platform.deepseek.com/api-docs/pricing models: - name: deepseek-chat max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 - name: deepseek-coder max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 - platform: zhipuai # docs: # - https://open.bigmodel.cn/dev/howuse/model # - https://open.bigmodel.cn/pricing models: - name: glm-4-0520 max_input_tokens: 128000 input_price: 14 output_price: 14 supports_function_calling: true - name: glm-4-airx max_input_tokens: 8092 input_price: 1.4 output_price: 1.4 supports_function_calling: true - name: glm-4-air max_input_tokens: 128000 input_price: 0.14 output_price: 0.14 supports_function_calling: true - name: glm-4-flash max_input_tokens: 128000 input_price: 0.014 output_price: 0.014 supports_function_calling: true - name: glm-4v max_input_tokens: 2048 input_price: 7 output_price: 7 supports_vision: true - platform: anyscale # docs: # - https://docs.anyscale.com/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct # - https://docs.endpoints.anyscale.com/pricing models: - name: meta-llama/Meta-Llama-3-8B-Instruct max_input_tokens: 8192 input_price: 0.15 output_price: 0.15 - name: meta-llama/Meta-Llama-3-70B-Instruct max_input_tokens: 8192 input_price: 1.0 output_price: 1.0 - name: codellama/CodeLlama-70b-Instruct-hf max_input_tokens: 4096 input_price: 1.0 output_price: 1.0 - name: mistralai/Mistral-7B-Instruct-v0.1 max_input_tokens: 16384 input_price: 0.15 output_price: 0.15 - name: mistralai/Mixtral-8x7B-Instruct-v0.1 max_input_tokens: 32768 input_price: 0.50 output_price: 0.50 - name: mistralai/Mixtral-8x22B-Instruct-v0.1 max_input_tokens: 65536 input_price: 0.90 output_price: 0.90 - platform: deepinfra # docs: # - https://deepinfra.com/models # - https://deepinfra.com/pricing models: - name: meta-llama/Meta-Llama-3-8B-Instruct max_input_tokens: 8192 input_price: 0.08 output_price: 0.08 - name: meta-llama/Meta-Llama-3-70B-Instruct max_input_tokens: 8192 input_price: 0.59 output_price: 0.79 - name: mistralai/Mistral-7B-Instruct-v0.3 max_input_tokens: 32768 input_price: 0.07 output_price: 0.07 - name: mistralai/Mixtral-8x7B-Instruct-v0.1 max_input_tokens: 32768 input_price: 0.24 output_price: 0.24 - name: mistralai/Mixtral-8x22B-Instruct-v0.1 max_input_tokens: 65536 input_price: 0.65 output_price: 0.65 - name: Qwen/Qwen2-72B-Instruct max_input_tokens: 32768 input_price: 0.59 output_price: 0.79 - name: microsoft/Phi-3-medium-4k-instruct max_input_tokens: 4096 input_price: 0.14 output_price: 0.14 - platform: fireworks # docs: # - https://fireworks.ai/models # - https://fireworks.ai/pricing models: - name: accounts/fireworks/models/firellava-13b max_input_tokens: 4096 input_price: 0.2 output_price: 0.2 supports_vision: true - name: accounts/fireworks/models/firefunction-v1 max_input_tokens: 32768 input_price: 0.2 output_price: 0.2 supports_function_calling: true - name: accounts/fireworks/models/llama-v3-8b-instruct max_input_tokens: 8192 input_price: 0.2 output_price: 0.2 - name: accounts/fireworks/models/llama-v3-70b-instruct max_input_tokens: 8192 input_price: 0.9 output_price: 0.9 - name: accounts/fireworks/models/mistral-7b-instruct-v3 max_input_tokens: 32768 input_price: 0.2 output_price: 0.2 - name: accounts/fireworks/models/mixtral-8x7b-instruct max_input_tokens: 32768 input_price: 0.5 output_price: 0.5 - name: accounts/fireworks/models/mixtral-8x22b-instruct max_input_tokens: 65536 input_price: 0.9 output_price: 0.9 - name: accounts/fireworks/models/qwen2-72b-instruct max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 - name: accounts/fireworks/models/phi-3-mini-128k-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 - name: accounts/fireworks/models/phi-3-vision-128k-instruct max_input_tokens: 131072 input_price: 0.2 output_price: 0.2 supports_vision: true - platform: openrouter # docs: # - https://openrouter.ai/docs#models models: - name: meta-llama/llama-3-8b-instruct max_input_tokens: 8192 input_price: 0.07 output_price: 0.07 - name: meta-llama/llama-3-8b-instruct:nitro max_input_tokens: 8192 input_price: 0.2 output_price: 0.2 - name: meta-llama/llama-3-8b-instruct:extended max_input_tokens: 16384 input_price: 0.2 output_price: 1.125 - name: meta-llama/llama-3-70b-instruct max_input_tokens: 8192 input_price: 0.59 output_price: 0.79 - name: meta-llama/llama-3-70b-instruct:nitro max_input_tokens: 8192 input_price: 0.9 output_price: 0.9 - name: mistralai/mistral-7b-instruct-v0.3 max_input_tokens: 32768 input_price: 0.07 output_price: 0.07 - name: microsoft/phi-3-mini-128k-instruct max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 - name: qwen/qwen-2-72b-instruct max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 - name: openai/gpt-4o max_input_tokens: 128000 input_price: 5 output_price: 15 supports_vision: true supports_function_calling: true - name: openai/gpt-4-turbo max_input_tokens: 128000 input_price: 10 output_price: 30 supports_vision: true supports_function_calling: true - name: openai/gpt-4-turbo-preview max_input_tokens: 128000 input_price: 10 output_price: 30 supports_function_calling: true - name: openai/gpt-4 max_input_tokens: 8192 input_price: 30 output_price: 60 - name: openai/gpt-3.5-turbo max_input_tokens: 16385 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: google/gemini-pro-1.5 max_input_tokens: 2800000 input_price: 2.5 output_price: 7.5 supports_vision: true supports_function_calling: true - name: google/gemini-flash-1.5 max_input_tokens: 2800000 input_price: 0.25 output_price: 0.75 supports_vision: true supports_function_calling: true - name: google/gemini-pro max_input_tokens: 91728 input_price: 0.125 output_price: 0.375 supports_function_calling: true - name: anthropic/claude-3-opus max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 15 output_price: 75 supports_vision: true supports_function_calling: true - name: anthropic/claude-3-sonnet max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 3 output_price: 15 supports_vision: true supports_function_calling: true - name: anthropic/claude-3-haiku max_input_tokens: 200000 max_output_tokens: 4096 require_max_tokens: true input_price: 0.25 output_price: 1.25 supports_vision: true supports_function_calling: true - name: mistralai/mixtral-8x7b-instruct max_input_tokens: 32768 input_price: 0.24 output_price: 0.24 - name: mistralai/mixtral-8x22b-instruct max_input_tokens: 65536 input_price: 0.65 output_price: 0.65 supports_function_calling: true - name: mistralai/mistral-small max_input_tokens: 32000 input_price: 2 output_price: 6 supports_function_calling: true - name: mistralai/mistral-large max_input_tokens: 32000 input_price: 8 output_price: 24 supports_function_calling: true - name: cohere/command-r max_input_tokens: 128000 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: cohere/command-r-plus max_input_tokens: 128000 input_price: 3 output_price: 15 supports_function_calling: true - name: deepseek/deepseek-chat max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 - name: deepseek/deepseek-coder max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 - platform: octoai # docs: # - https://octo.ai/docs/getting-started/inference-models # - https://octo.ai/pricing/text-gen-solution/ models: - name: meta-llama-3-8b-instruct max_input_tokens: 8192 input_price: 0.13 output_price: 0.13 - name: meta-llama-3-70b-instruct max_input_tokens: 8192 input_price: 0.86 output_price: 0.86 - name: mistral-7b-instruct max_input_tokens: 32768 input_price: 0.13 output_price: 0.13 - name: mixtral-8x7b-instruct max_input_tokens: 32768 input_price: 0.34 output_price: 0.34 - name: mixtral-8x22b-instruct max_input_tokens: 65536 input_price: 0.86 output_price: 0.86 - platform: together # docs: # - https://docs.together.ai/docs/inference-models # - https://www.together.ai/pricing models: - name: meta-llama/Llama-3-8b-chat-hf max_input_tokens: 8000 input_price: 0.2 output_price: 0.2 - name: meta-llama/Llama-3-70b-chat-hf max_input_tokens: 8000 input_price: 0.9 output_price: 0.9 - name: mistralai/Mistral-7B-Instruct-v0.3 max_input_tokens: 32768 input_price: 0.2 output_price: 0.2 - name: mistralai/Mixtral-8x7B-Instruct-v0.1 max_input_tokens: 32768 input_price: 0.9 output_price: 0.9 - name: mistralai/Mixtral-8x22B-Instruct-v0.1 max_input_tokens: 65536 input_price: 1.2 output_price: 1.2 - name: Qwen/Qwen2-72B-Instruct max_input_tokens: 32768 input_price: 0.9 output_price: 0.9