aichat/models.yaml
2024-04-28 11:27:06 +08:00

402 lines
11 KiB
YAML

- type: openai
# docs:
# - https://platform.openai.com/docs/models
# - https://openai.com/pricing
# - https://platform.openai.com/docs/api-reference/chat
models:
- name: gpt-3.5-turbo
max_input_tokens: 16385
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
max_output_tokens?: 4096
input_price: 1
output_price: 2
- name: gpt-4-turbo
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4-turbo-preview
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
- name: gpt-4-1106-preview
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 10
output_price: 30
- name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
- name: gpt-4
max_input_tokens: 8192
max_output_tokens?: 4096
input_price: 30
output_price: 60
- name: gpt-4-32k
max_input_tokens: 32768
max_output_tokens?: 4096
input_price: 60
output_price: 120
- type: gemini
docs:
# - https://ai.google.dev/models/gemini
# - https://ai.google.dev/pricing
# - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent
models:
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
max_output_tokens?: 2048
input_price: 0.5
output_price: 1.5
- name: gemini-1.0-pro-vision-latest
max_input_tokens: 12288
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
supports_vision: true
- name: gemini-1.5-pro-latest
max_input_tokens: 1048576
max_output_tokens?: 8192
input_price: 7
output_price: 21
supports_vision: true
- type: claude
# docs:
# - https://docs.anthropic.com/claude/docs/models-overview
# - https://docs.anthropic.com/claude/reference/messages-streaming
models:
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 0.25
output_price: 1.25
supports_vision: true
- type: mistral
# docs:
# - https://docs.mistral.ai/getting-started/models/
# - https://mistral.ai/technology/#pricing
# - https://docs.mistral.ai/api/
models:
- name: open-mistral-7b
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 0.25
output_price: 0.25
- name: open-mixtral-8x7b
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 0.7
output_price: 0.7
- name: open-mixtral-8x22b
max_input_tokens: 64000
max_output_tokens?: 8191
input_price: 2
output_price: 6
- name: mistral-small-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 2
output_price: 6
- name: mistral-medium-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 2.7
output_price: 8.1
- name: mistral-large-latest
max_input_tokens: 32000
max_output_tokens?: 8191
input_price: 8
output_price: 24
- type: cohere
# docs:
# - https://docs.cohere.com/docs/command-r
# - https://docs.cohere.com/docs/command-r-plus
# - https://cohere.com/pricing
# - https://docs.cohere.com/reference/chat
models:
- name: command-r
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 0.5
output_price: 1.5
- name: command-r-plus
max_input_tokens: 128000
max_output_tokens?: 4096
input_price: 3
output_price: 15
- type: perplexity
# docs:
# - https://docs.perplexity.ai/docs/model-cards
# - https://docs.perplexity.ai/docs/pricing
# - https://docs.perplexity.ai/reference/post_chat_completions
models:
- name: sonar-small-chat
max_input_tokens: 16384
- name: sonar-small-online
max_input_tokens: 12000
- name: sonar-medium-chat
max_input_tokens: 16384
- name: sonar-medium-online
max_input_tokens: 12000
- name: llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
input_price: 0.2
output_price: 0.2
- name: llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens?: 8192
input_price: 1
output_price: 1
- name: codellama-70b-instruct
max_input_tokens: 16384
max_output_tokens?: 4096
input_price: 1
output_price: 1
- name: mistral-7b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 0.2
output_price: 0.2
- name: mixtral-8x7b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 0.6
output_price: 0.6
- name: mixtral-8x22b-instruct
max_input_tokens: 16384
max_output_tokens?: 8191
input_price: 1
output_price: 1
- type: groq
# docs:
# - https://console.groq.com/docs/models
# - https://console.groq.com/docs/text-chat
models:
- name: llama3-8b-8192
max_input_tokens: 8192
max_output_tokens: 8192
- name: llama3-70b-8192
max_input_tokens: 8192
max_output_tokens: 8192
- name: llama2-70b-4096
max_input_tokens: 4096
max_output_tokens: 4096
- name: mixtral-8x7b-32768
max_input_tokens: 32768
max_output_tokens: 32768
- name: gemma-7b-it
max_input_tokens: 8192
max_output_tokens: 8192
- type: vertexai
# docs:
# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
# - https://cloud.google.com/vertex-ai/generative-ai/pricing
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
models:
- name: gemini-1.0-pro
max_input_tokens: 24568
max_output_tokens?: 8193
input_price: 0.125
output_price: 0.375
- name: gemini-1.0-pro-vision
max_input_tokens: 14336
max_output_tokens?: 2049
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: gemini-1.5-pro-preview-0409
max_input_tokens: 1000000
max_output_tokens?: 8193
input_price: 2.5
output_price: 7.5
supports_vision: true
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 0.25
output_price: 1.25
supports_vision: true
- type: bedrock
# docs:
# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
# - https://aws.amazon.com/bedrock/pricing/
models:
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: meta.llama2-13b-chat-v1
max_input_tokens: 4096
max_output_tokens: 2048
input_price: 0.75
output_price: 1
- name: meta.llama2-70b-chat-v1
max_input_tokens: 4096
max_output_tokens: 2048
input_price: 1.95
output_price: 2.56
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
input_price: 0.4
output_price: 0.6
- name: meta.llama3-70b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
input_price: 2.65
output_price: 3.5
- name: mistral.mistral-7b-instruct-v0:2
max_input_tokens: 32000
max_output_tokens: 8192
input_price: 0.15
output_price: 0.2
- name: mistral.mixtral-8x7b-instruct-v0:1
max_input_tokens: 32000
max_output_tokens: 4096
input_price: 0.45
output_price: 0.7
- name: mistral.mistral-large-2402-v1:0
max_input_tokens: 32000
max_output_tokens: 8192
input_price: 8
output_price: 2.4
- type: ernie
# docs:
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
models:
- name: ernie-4.0-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
input_price: 16.8
output_price: 16.8
- name: ernie-3.5-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
input_price: 1.68
output_price: 1.68
- name: ernie-speed-128k
max_input_tokens: 124000
max_output_tokens: 4096
input_price: 0.56
output_price: 1.12
- name: ernie-lite-8k
max_input_tokens: 7168
max_output_tokens: 2048
input_price: 0.42
output_price: 0.84
- name: ernie-tiny-8k
max_input_tokens: 7168
max_output_tokens: 2048
input_price: 0.14
output_price: 0.14
- type: qianwen
# docs:
# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyiqianwen-large-language-models/
# - https://help.aliyun.com/zh/dashscope/developer-reference/qwen-vl-plus/
models:
- name: qwen-turbo
max_input_tokens: 6000
max_output_tokens?: 1500
input_price: 1.12
output_price: 1.12
- name: qwen-plus
max_input_tokens: 30000
max_output_tokens?: 2000
input_price: 2.8
output_price: 2.8
- name: qwen-max
max_input_tokens: 6000
max_output_tokens?: 2000
input_price: 16.8
output_price: 16.8
- name: qwen-max-longcontext
max_input_tokens: 28000
max_output_tokens?: 2000
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
supports_vision: true
- name: qwen-vl-max
input_price: 2.8
output_price: 2.8
supports_vision: true
- type: moonshot
# docs:
# - https://platform.moonshot.cn/docs/intro
# - https://platform.moonshot.cn/docs/pricing
# - https://platform.moonshot.cn/docs/api-reference
models:
- name: moonshot-v1-8k
max_input_tokens: 8000
input_price: 1.68
output_price: 1.68
- name: moonshot-v1-32k
max_input_tokens: 32000
input_price: 3.36
output_price: 3.36
- name: moonshot-v1-128k
max_input_tokens: 128000
input_price: 8.4
output_price: 8.4