You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
aichat/models.yaml

1494 lines
42 KiB
YAML

# Notes:
# - do not submit pull requests to add new models; this list will be updated in batches with new releases.
# Links:
# - https://platform.openai.com/docs/models
# - https://openai.com/pricing
# - https://platform.openai.com/docs/api-reference/chat
- platform: openai
models:
- name: gpt-4o
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: gpt-4o-2024-08-06
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 2.5
output_price: 10
supports_vision: true
supports_function_calling: true
- name: chatgpt-4o-latest
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: gpt-4o-mini
max_input_tokens: 128000
max_output_tokens: 16384
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
supports_function_calling: true
- name: o1-preview
max_input_tokens: 128000
max_output_tokens: 32768
input_price: 15
output_price: 60
- name: o1-mini
max_input_tokens: 128000
max_output_tokens: 65536
input_price: 3
output_price: 12
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: text-embedding-3-large
type: embedding
input_price: 0.13
max_tokens_per_chunk: 8191
default_chunk_size: 3000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
input_price: 0.02
max_tokens_per_chunk: 8191
default_chunk_size: 3000
max_batch_size: 100
# Links:
# - https://ai.google.dev/models/gemini
# - https://ai.google.dev/pricing
# - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent
- platform: gemini
models:
- name: gemini-1.5-pro-latest
max_input_tokens: 2097152
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-pro-exp-0827
max_input_tokens: 2097152
max_output_tokens: 8192
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-latest
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-exp-0827
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-8b-exp-0827
max_input_tokens: 1048576
max_output_tokens: 8192
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
max_output_tokens: 2048
input_price: 0
output_price: 0
supports_function_calling: true
- name: text-embedding-004
type: embedding
input_price: 0
output_price: 0
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 100
# Links:
# - https://docs.anthropic.com/claude/docs/models-overview
# - https://docs.anthropic.com/claude/reference/messages-streaming
- platform: claude
models:
- name: claude-3-5-sonnet-20240620
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
# Links:
# - https://docs.mistral.ai/getting-started/models/
# - https://mistral.ai/technology/#pricing
# - https://docs.mistral.ai/api/
- platform: mistral
models:
- name: mistral-large-latest
max_input_tokens: 128000
input_price: 3
output_price: 9
supports_function_calling: true
- name: open-mistral-nemo
max_input_tokens: 128000
input_price: 0.3
output_price: 0.3
supports_function_calling: true
- name: codestral-latest
max_input_tokens: 32000
input_price: 1
output_price: 3
- name: open-codestral-mamba
max_input_tokens: 256000
input_price: 0.25
output_price: 0.25
- name: pixtral-12b-2409
max_input_tokens: 128000
input_price: 0.15
output_price: 0.15
supports_vision: true
- name: mistral-embed
type: embedding
max_input_tokens: 8092
input_price: 0.1
max_tokens_per_chunk: 8092
default_chunk_size: 2000
# Links:
# - https://docs.ai21.com/docs/jamba-15-models
# - https://www.ai21.com/pricing
# - https://docs.ai21.com/reference/jamba-15-api-ref
- platform: ai21
models:
- name: jamba-1.5-large
max_input_tokens: 256000
input_price: 2
output_price: 8
supports_function_calling: true
- name: jamba-1.5-mini
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
# Links:
# - https://docs.cohere.com/docs/command-r-plus
# - https://cohere.com/pricing
# - https://docs.cohere.com/reference/chat
- platform: cohere
models:
- name: command-r-plus
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: command-r-plus-08-2024
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: command-r
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: command-r-08-2024
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: embed-english-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: embed-multilingual-v3.0
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: rerank-english-v3.0
type: reranker
max_input_tokens: 4096
- name: rerank-multilingual-v3.0
type: reranker
max_input_tokens: 4096
# Links:
# - https://docs.perplexity.ai/guides/model-cards
# - https://docs.perplexity.ai/guides/pricing
# - https://docs.perplexity.ai/api-reference/chat-completions
- platform: perplexity
models:
- name: llama-3.1-sonar-huge-128k-online
max_input_tokens: 127072
input_price: 5
output_price: 5
- name: llama-3.1-sonar-large-128k-online
max_input_tokens: 127072
input_price: 1
output_price: 1
- name: llama-3.1-sonar-large-128k-chat
max_input_tokens: 131072
input_price: 1
output_price: 1
- name: llama-3.1-sonar-small-128k-online
max_input_tokens: 127072
input_price: 0.2
output_price: 0.2
- name: llama-3.1-sonar-small-128k-chat
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
- name: llama-3.1-70b-instruct
max_input_tokens: 131072
input_price: 1
output_price: 1
- name: llama-3.1-8b-instruct
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
# Links:
# - https://console.groq.com/docs/models
# - https://console.groq.com/docs/api-reference#chat
- platform: groq
models:
- name: llama3-70b-8192
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama3-8b-8192
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama3-groq-70b-8192-tool-use-preview
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama3-groq-8b-8192-tool-use-preview
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_function_calling: true
- name: llama-3.1-70b-versatile
max_input_tokens: 8192
input_price: 0
output_price: 0
- name: llama-3.1-8b-instant
max_input_tokens: 8192
input_price: 0
output_price: 0
- name: gemma2-9b-it
max_input_tokens: 8192
input_price: 0
output_price: 0
supports_function_calling: true
# Links:
# - https://ollama.com/library
# - https://github.com/ollama/ollama/blob/main/docs/openai.md
- platform: ollama
models:
- name: llama3.1
max_input_tokens: 128000
supports_function_calling: true
- name: gemma2
max_input_tokens: 8192
- name: mistral-nemo
max_input_tokens: 128000
supports_function_calling: true
- name: mistral-large
max_input_tokens: 128000
supports_function_calling: true
- name: deepseek-coder-v2
max_input_tokens: 32768
- name: phi3
max_input_tokens: 128000
supports_function_calling: true
- name: nomic-embed-text
type: embedding
max_tokens_per_chunk: 8192
default_chunk_size: 1000
max_batch_size: 50
# Links:
# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models
# - https://cloud.google.com/vertex-ai/generative-ai/pricing
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
- platform: vertexai
models:
- name: gemini-1.5-pro-001
max_input_tokens: 2097152
max_output_tokens: 8192
input_price: 1.25
output_price: 3.75
supports_vision: true
supports_function_calling: true
- name: gemini-1.5-flash-001
max_input_tokens: 1000000
max_output_tokens: 8192
input_price: 0.01875
output_price: 0.0375
supports_vision: true
supports_function_calling: true
- name: gemini-1.0-pro-002
max_input_tokens: 24568
max_output_tokens: 8192
input_price: 0.125
output_price: 0.375
supports_function_calling: true
- name: claude-3-5-sonnet@20240620
max_input_tokens: 200000
max_output_tokens: 8192
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: mistral-large@2407
max_input_tokens: 128000
input_price: 3
output_price: 9
supports_function_calling: true
- name: mistral-nemo@2407
max_input_tokens: 128000
input_price: 0.3
output_price: 0.3
supports_function_calling: true
- name: codestral@2405
max_input_tokens: 32000
input_price: 1
output_price: 3
- name: text-embedding-004
type: embedding
max_input_tokens: 20000
input_price: 0.025
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
- name: text-multilingual-embedding-002
type: embedding
max_input_tokens: 20000
input_price: 0.2
max_tokens_per_chunk: 2048
default_chunk_size: 1500
max_batch_size: 5
# Links:
# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
# - https://aws.amazon.com/bedrock/pricing/
# - https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html
- platform: bedrock
models:
- name: anthropic.claude-3-5-sonnet-20240620-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: meta.llama3-1-405b-instruct-v1:0
max_input_tokens: 128000
input_price: 5.32
output_price: 16
supports_function_calling: true
- name: meta.llama3-1-70b-instruct-v1:0
max_input_tokens: 128000
input_price: 2.65
output_price: 3.5
supports_function_calling: true
- name: meta.llama3-1-8b-instruct-v1:0
max_input_tokens: 128000
input_price: 0.3
output_price: 0.6
supports_function_calling: true
- name: meta.llama3-70b-instruct-v1:0
max_input_tokens: 8192
input_price: 2.65
output_price: 3.5
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
input_price: 0.3
output_price: 0.6
- name: mistral.mistral-large-2407-v1:0
max_input_tokens: 128000
input_price: 3
output_price: 9
supports_function_calling: true
- name: cohere.command-r-plus-v1:0
max_input_tokens: 128000
input_price: 3
output_price: 15
supports_function_calling: true
- name: cohere.command-r-v1:0
max_input_tokens: 128000
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: cohere.embed-english-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere.embed-multilingual-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
# Links:
# - https://developers.cloudflare.com/workers-ai/models/
# - https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
- platform: cloudflare
models:
- name: '@cf/meta/llama-3.1-8b-instruct'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/baai/bge-large-en-v1.5'
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
# Links:
# - https://huggingface.co/models?other=text-generation-inference
# - https://huggingface.co/docs/text-generation-inference/en/reference/api_reference
- platform: huggingface
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
max_output_tokens: 4096
require_max_tokens: true
input_price: 0
output_price: 0
- name: mistralai/Mistral-Nemo-Instruct-2407
max_input_tokens: 128000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0
output_price: 0
# Links:
# - https://replicate.com/explore
# - https://replicate.com/pricing
# - https://replicate.com/docs/reference/http#create-a-prediction-using-an-official-model
- platform: replicate
models:
- name: meta/meta-llama-3.1-405b-instruct
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 9.5
output_price: 9.5
- name: meta/meta-llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.65
output_price: 2.75
- name: meta/meta-llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.05
output_price: 0.25
# Links:
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
- platform: ernie
models:
- name: ernie-4.0-turbo-8k-preview
max_input_tokens: 8192
input_price: 4.2
output_price: 8.4
supports_function_calling: true
- name: ernie-4.0-8k-preview
max_input_tokens: 8192
input_price: 5.6
output_price: 11.2
supports_function_calling: true
- name: ernie-3.5-8k-preview
max_input_tokens: 8192
input_price: 1.68
output_price: 1.68
supports_function_calling: true
- name: ernie-speed-128k
max_input_tokens: 128000
input_price: 0
output_price: 0
- name: bge_large_zh
type: embedding
input_price: 0.28
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bge_large_en
type: embedding
input_price: 0.28
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 16
- name: bce_reranker_base
type: reranker
max_input_tokens: 1024
input_price: 0.28
# Links:
# - https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
# - https://help.aliyun.com/zh/dashscope/developer-reference/use-qwen-by-api
- platform: qianwen
models:
- name: qwen-max
max_input_tokens: 8000
input_price: 5.6
output_price: 16.8
supports_function_calling: true
- name: qwen-max-longcontext
input_price: 5.6
output_price: 16.8
max_input_tokens: 30000
supports_function_calling: true
- name: qwen-plus
max_input_tokens: 32000
input_price: 0.56
output_price: 1.68
supports_function_calling: true
- name: qwen-turbo
max_input_tokens: 8000
input_price: 0.28
output_price: 0.84
supports_function_calling: true
- name: qwen-long
max_input_tokens: 1000000
input_price: 0.07
output_price: 0.28
- name: qwen-vl-max
input_price: 2.8
output_price: 2.8
supports_vision: true
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
supports_vision: true
- name: text-embedding-v3
type: embedding
input_price: 0.1
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 6
- name: text-embedding-v2
type: embedding
input_price: 0.1
max_tokens_per_chunk: 2048
default_chunk_size: 2000
max_batch_size: 25
# Links:
# - https://platform.moonshot.cn/docs/intro
# - https://platform.moonshot.cn/docs/pricing/chat
# - https://platform.moonshot.cn/docs/api/chat
- platform: moonshot
models:
- name: moonshot-v1-8k
max_input_tokens: 8000
input_price: 1.68
output_price: 1.68
supports_function_calling: true
- name: moonshot-v1-32k
max_input_tokens: 32000
input_price: 3.36
output_price: 3.36
supports_function_calling: true
- name: moonshot-v1-128k
max_input_tokens: 128000
input_price: 8.4
output_price: 8.4
supports_function_calling: true
# Links:
# - https://platform.deepseek.com/api-docs/quick_start/pricing
# - https://platform.deepseek.com/api-docs/api/create-chat-completion
- platform: deepseek
models:
- name: deepseek-chat
max_input_tokens: 32768
input_price: 0.14
output_price: 0.28
supports_function_calling: true
- name: deepseek-coder
max_input_tokens: 32768
input_price: 0.14
output_price: 0.28
supports_function_calling: true
# Links:
# - https://open.bigmodel.cn/dev/howuse/model
# - https://open.bigmodel.cn/pricing
# - https://open.bigmodel.cn/dev/api#glm-4
- platform: zhipuai
models:
- name: glm-4-plus
max_input_tokens: 128000
input_price: 7
output_price: 7
supports_function_calling: true
- name: glm-4-alltools
max_input_tokens: 2048
input_price: 14
output_price: 14
supports_function_calling: true
- name: glm-4-0520
max_input_tokens: 128000
input_price: 14
output_price: 14
supports_function_calling: true
- name: glm-4-long
max_input_tokens: 1000000
input_price: 0.14
output_price: 0.14
supports_function_calling: true
- name: glm-4-flash
max_input_tokens: 128000
input_price: 0
output_price: 0
supports_function_calling: true
- name: glm-4v-plus
max_input_tokens: 8192
input_price: 1.4
output_price: 1.4
supports_vision: true
- name: embedding-3
type: embedding
max_input_tokens: 8192
input_price: 0.07
max_tokens_per_chunk: 8192
default_chunk_size: 2000
# Links:
# - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B%E4%B8%8E%E8%AE%A1%E8%B4%B9
# - https://platform.lingyiwanwu.com/docs/api-reference#create-chat-completion
- platform: lingyiwanwu
models:
- name: yi-large
max_input_tokens: 32768
input_price: 2.8
output_price: 2.8
- name: yi-large-fc
max_input_tokens: 32768
input_price: 2.8
output_price: 2.8
supports_function_calling: true
- name: yi-large-rag
max_input_tokens: 16384
input_price: 3.5
output_price: 3.5
- name: yi-large-turbo
max_input_tokens: 16384
input_price: 1.68
output_price: 1.68
- name: yi-medium-200k
max_input_tokens: 200000
input_price: 1.68
output_price: 1.68
- name: yi-medium
max_input_tokens: 16384
input_price: 0.35
output_price: 0.35
- name: yi-spark
max_input_tokens: 16384
input_price: 0.14
output_price: 0.14
- name: yi-vision
max_input_tokens: 16384
input_price: 0.84
output_price: 0.84
supports_vision: true
# Links:
# - https://github.com/marketplace/models
- platform: github
models:
- name: gpt-4o
max_input_tokens: 128000
supports_function_calling: true
- name: gpt-4o-mini
max_input_tokens: 128000
supports_function_calling: true
- name: text-embedding-3-large
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 3000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
max_tokens_per_chunk: 8191
default_chunk_size: 3000
max_batch_size: 100
- name: meta-llama-3.1-405b-instruct
max_input_tokens: 128000
- name: meta-llama-3.1-70b-instruct
max_input_tokens: 128000
- name: meta-llama-3.1-8b-instruct
max_input_tokens: 128000
- name: meta-llama-3-70b-instruct
max_input_tokens: 8192
- name: meta-llama-3-8b-instruct
max_input_tokens: 8192
- name: mistral-large-2407
max_input_tokens: 128000
supports_function_calling: true
- name: mistral-nemo
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r-plus
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-command-r
max_input_tokens: 128000
supports_function_calling: true
- name: cohere-embed-v3-english
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: cohere-embed-v3-multilingual
type: embedding
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 96
- name: ai21-jamba-1.5-large
max_input_tokens: 256000
supports_function_calling: true
- name: ai21-jamba-1.5-mini
max_input_tokens: 256000
supports_function_calling: true
- name: phi-3.5-mini-instruct
max_input_tokens: 128000
- name: phi-3-medium-128k-instruct
max_input_tokens: 128000
# Links:
# - https://deepinfra.com/models
# - https://deepinfra.com/pricing
- platform: deepinfra
models:
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32000
input_price: 2.7
output_price: 2.7
supports_function_calling: true
- name: meta-llama/Meta-Llama-3.1-70B-Instruct
max_input_tokens: 128000
input_price: 0.52
output_price: 0.75
supports_function_calling: true
- name: meta-llama/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 128000
input_price: 0.09
output_price: 0.09
supports_function_calling: true
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.08
output_price: 0.08
- name: mistralai/Mistral-Nemo-Instruct-2407
max_input_tokens: 128000
input_price: 0.13
output_price: 0.13
- name: google/gemma-2-27b-it
max_input_tokens: 8192
input_price: 0.27
output_price: 0.27
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0.09
output_price: 0.09
- name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768
input_price: 0.59
output_price: 0.79
supports_function_calling: true
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0.01
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: intfloat/e5-large-v2
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: intfloat/multilingual-e5-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
input_price: 0.01
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
# Links:
# - https://fireworks.ai/models
# - https://fireworks.ai/pricing
- platform: fireworks
models:
- name: accounts/fireworks/models/llama-v3p1-405b-instruct
max_input_tokens: 131072
input_price: 3
output_price: 3
- name: accounts/fireworks/models/llama-v3p1-70b-instruct
max_input_tokens: 131072
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/llama-v3p1-8b-instruct
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/llama-v3-70b-instruct
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: accounts/fireworks/models/llama-v3-8b-instruct
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/gemma2-9b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/phi-3-vision-128k-instruct
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
supports_vision: true
- name: accounts/fireworks/models/firellava-13b
max_input_tokens: 4096
input_price: 0.2
output_price: 0.2
supports_vision: true
- name: accounts/fireworks/models/firefunction-v2
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
supports_function_calling: true
- name: nomic-ai/nomic-embed-text-v1.5
type: embedding
input_price: 0.008
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: WhereIsAI/UAE-Large-V1
type: embedding
input_price: 0.016
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
input_price: 0.016
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
# Links:
# - https://openrouter.ai/docs#models
- platform: openrouter
models:
- name: openai/gpt-4o
max_input_tokens: 128000
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o-2024-08-06
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_vision: true
supports_function_calling: true
- name: openai/chatgpt-4o-latest
max_input_tokens: 128000
input_price: 5
output_price: 15
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4o-mini
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_vision: true
supports_function_calling: true
- name: openai/gpt-4-turbo
max_input_tokens: 128000
input_price: 10
output_price: 30
supports_vision: true
supports_function_calling: true
- name: openai/o1-preview
max_input_tokens: 128000
input_price: 15
output_price: 60
- name: openai/o1-mini
max_input_tokens: 128000
input_price: 3
output_price: 12
- name: openai/gpt-3.5-turbo
max_input_tokens: 16385
input_price: 0.5
output_price: 1.5
supports_function_calling: true
- name: google/gemini-pro-1.5
max_input_tokens: 4000000
input_price: 2.5
output_price: 7.5
supports_vision: true
supports_function_calling: true
- name: google/gemini-pro-1.5-exp
max_input_tokens: 4000000
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: google/gemini-flash-1.5
max_input_tokens: 4000000
input_price: 0.0375
output_price: 0.15
supports_vision: true
supports_function_calling: true
- name: google/gemini-flash-1.5-exp
max_input_tokens: 4000000
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: google/gemini-flash-8b-1.5-exp
max_input_tokens: 4000000
input_price: 0
output_price: 0
supports_vision: true
supports_function_calling: true
- name: google/gemini-pro
max_input_tokens: 131040
input_price: 0.125
output_price: 0.375
supports_function_calling: true
- name: google/gemma-2-27b-it
max_input_tokens: 2800000
input_price: 0.27
output_price: 0.27
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0.06
output_price: 0.06
- name: anthropic/claude-3.5-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
supports_function_calling: true
- name: meta-llama/llama-3.1-405b-instruct
max_input_tokens: 131072
input_price: 3
output_price: 3
supports_function_calling: true
- name: meta-llama/llama-3.1-70b-instruct
max_input_tokens: 131072
input_price: 0.75
output_price: 0.75
supports_function_calling: true
- name: meta-llama/llama-3.1-8b-instruct
max_input_tokens: 131072
input_price: 0.09
output_price: 0.09
supports_function_calling: true
- name: meta-llama/llama-3-70b-instruct
max_input_tokens: 8192
input_price: 0.59
output_price: 0.79
supports_function_calling: true
- name: meta-llama/llama-3-8b-instruct
max_input_tokens: 8192
input_price: 0.07
output_price: 0.07
supports_function_calling: true
- name: mistralai/mistral-large
max_input_tokens: 128000
input_price: 3
output_price: 9
supports_function_calling: true
- name: mistralai/mistral-nemo
max_input_tokens: 128000
input_price: 0.18
output_price: 0.18
supports_function_calling: true
- name: mistralai/codestral-mamba
max_input_tokens: 256000
input_price: 0.25
output_price: 0.25
- name: ai21/jamba-1-5-large
max_input_tokens: 256000
input_price: 2
output_price: 8
supports_function_calling: true
- name: ai21/jamba-1-5-mini
max_input_tokens: 256000
input_price: 0.2
output_price: 0.4
supports_function_calling: true
- name: cohere/command-r-plus
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: cohere/command-r-plus-08-2024
max_input_tokens: 128000
input_price: 2.5
output_price: 10
supports_function_calling: true
- name: cohere/command-r
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: cohere/command-r-08-2024
max_input_tokens: 128000
input_price: 0.15
output_price: 0.6
supports_function_calling: true
- name: deepseek/deepseek-chat
max_input_tokens: 32768
input_price: 0.14
output_price: 0.28
supports_function_calling: true
- name: deepseek/deepseek-coder
max_input_tokens: 32768
input_price: 0.14
output_price: 0.28
supports_function_calling: true
- name: perplexity/llama-3.1-sonar-huge-128k-online
max_input_tokens: 127072
input_price: 5
output_price: 5
- name: perplexity/llama-3.1-sonar-large-128k-online
max_input_tokens: 127072
input_price: 1
output_price: 1
- name: perplexity/llama-3.1-sonar-large-128k-chat
max_input_tokens: 131072
input_price: 1
output_price: 1
- name: perplexity/llama-3.1-sonar-small-128k-online
max_input_tokens: 127072
input_price: 0.2
output_price: 0.2
- name: perplexity/llama-3.1-sonar-small-128k-chat
max_input_tokens: 131072
input_price: 0.2
output_price: 0.2
- name: 01-ai/yi-large
max_input_tokens: 32768
input_price: 3
output_price: 3
- name: 01-ai/yi-large-fc
max_input_tokens: 16384
input_price: 3
output_price: 3
supports_function_calling: true
- name: 01-ai/yi-vision
max_input_tokens: 4096
input_price: 0.84
output_price: 0.84
supports_vision: true
- name: microsoft/phi-3.5-mini-128k-instruct
max_input_tokens: 128000
input_price: 0.1
output_price: 0.1
- name: microsoft/phi-3-medium-128k-instruct
max_input_tokens: 128000
input_price: 1
output_price: 1
- name: microsoft/phi-3-mini-128k-instruct
max_input_tokens: 128000
input_price: 0.1
output_price: 0.1
- name: qwen/qwen-2-72b-instruct
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
# Links:
# - https://octo.ai/docs/getting-started/inference-models
# - https://octo.ai/docs/getting-started/pricing-and-billing
- platform: octoai
models:
- name: meta-llama-3.1-405b-instruct
max_input_tokens: 131072
input_price: 3
output_price: 9
supports_function_calling: true
- name: meta-llama-3.1-70b-instruct
max_input_tokens: 131072
input_price: 0.9
output_price: 0.9
supports_function_calling: true
- name: meta-llama-3.1-8b-instruct
max_input_tokens: 131072
input_price: 0.15
output_price: 0.15
supports_function_calling: true
- name: mistral-nemo-instruct
max_input_tokens: 65536
input_price: 0.2
output_price: 0.2
- name: thenlper/gte-large
type: embedding
input_price: 0.05
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
# Links
# - https://siliconflow.cn/zh-cn/maaspricing
# - https://docs.siliconflow.cn/reference/chat-completions-3
- platform: siliconflow
models:
- name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: meta-llama/Meta-Llama-3.1-405B-Instruct
max_input_tokens: 32768
input_price: 2.94
output_price: 2.94
- name: meta-llama/Meta-Llama-3.1-70B-Instruct
max_input_tokens: 32768
input_price: 0.578
output_price: 0.578
- name: meta-llama/Meta-Llama-3.1-8B-Instruct
max_input_tokens: 32768
input_price: 0
output_price: 0
- name: google/gemma-2-27b-it
max_input_tokens: 8192
input_price: 0.176
output_price: 0.176
- name: google/gemma-2-9b-it
max_input_tokens: 8192
input_price: 0
output_price: 0
- name: deepseek-ai/DeepSeek-V2.5
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: deepseek-ai/DeepSeek-Coder-V2-Instruct
max_input_tokens: 32768
input_price: 0.186
output_price: 0.186
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-large-zh-v1.5
type: embedding
input_price: 0
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 2000
max_batch_size: 100
- name: BAAI/bge-reranker-v2-m3
type: reranker
max_input_tokens: 8192
input_price: 0
# Links:
# - https://docs.together.ai/docs/inference-models
# - https://docs.together.ai/docs/embedding-models
# - https://www.together.ai/pricing
- platform: together
models:
- name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
max_input_tokens: 32768
input_price: 5
output_price: 5
- name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
max_input_tokens: 8192
input_price: 0.88
output_price: 0.88
- name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
max_input_tokens: 8192
input_price: 0.18
output_price: 0.18
- name: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
max_input_tokens: 8192
input_price: 0.88
output_price: 0.88
- name: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
max_input_tokens: 8192
input_price: 0.18
output_price: 0.18
- name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768
input_price: 0.9
output_price: 0.9
- name: WhereIsAI/UAE-Large-V1
type: embedding
input_price: 0.016
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-large-en-v1.5
type: embedding
input_price: 0.016
max_tokens_per_chunk: 512
default_chunk_size: 1000
max_batch_size: 100
# Links:
# - https://jina.ai/
# - https://api.jina.ai/redoc
- platform: jina
models:
- name: jina-clip-v1
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-embeddings-v2-base-en
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-embeddings-v2-base-zh
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-colbert-v2
type: embedding
input_price: 0
max_tokens_per_chunk: 8192
default_chunk_size: 1500
max_batch_size: 100
- name: jina-reranker-v2-base-multilingual
type: reranker
max_input_tokens: 1024
input_price: 0
- name: jina-reranker-v1-base-en
type: reranker
max_input_tokens: 8192
input_price: 0
- name: jina-colbert-v2
type: reranker
max_input_tokens: 8192
input_price: 0
# Links:
# - https://docs.voyageai.com/docs/embeddings
# - https://docs.voyageai.com/docs/pricing
# - https://docs.voyageai.com/reference/
- platform: voyageai
models:
- name: voyage-3
type: embedding
max_input_tokens: 320000
input_price: 0.06
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-3-lite
type: embedding
max_input_tokens: 1000000
input_price: 0.02
max_tokens_per_chunk: 32000
default_chunk_size: 1000
max_batch_size: 128
- name: voyage-multilingual-2
type: embedding
max_input_tokens: 120000
input_price: 0.12
max_tokens_per_chunk: 32000
default_chunk_size: 2000
max_batch_size: 128
- name: voyage-code-2
type: embedding
max_input_tokens: 120000
input_price: 0.12
max_tokens_per_chunk: 16000
default_chunk_size: 3000
max_batch_size: 128
- name: rerank-1
type: reranker
max_input_tokens: 8000
input_price: 0.05