2024-05-13 22:11:34 +00:00
|
|
|
# notes:
|
|
|
|
# - do not submit pull requests to add new models; this list will be updated in batches with new releases.
|
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: openai
|
2024-04-29 03:57:02 +00:00
|
|
|
# docs:
|
2024-04-28 02:14:12 +00:00
|
|
|
# - https://platform.openai.com/docs/models
|
|
|
|
# - https://openai.com/pricing
|
|
|
|
# - https://platform.openai.com/docs/api-reference/chat
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes
|
|
|
|
# - get max_output_tokens info from api error
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-05-13 22:11:34 +00:00
|
|
|
- name: gpt-4o
|
|
|
|
max_input_tokens: 128000
|
|
|
|
max_output_tokens: 4096
|
|
|
|
input_price: 5
|
|
|
|
output_price: 15
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: gpt-4-turbo
|
|
|
|
max_input_tokens: 128000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 10
|
|
|
|
output_price: 30
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: gpt-4-turbo-preview
|
|
|
|
max_input_tokens: 128000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 10
|
|
|
|
output_price: 30
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: gpt-4-1106-preview
|
|
|
|
max_input_tokens: 128000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 10
|
|
|
|
output_price: 30
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: gpt-4
|
|
|
|
max_input_tokens: 8192
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 30
|
|
|
|
output_price: 60
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: gpt-3.5-turbo
|
|
|
|
max_input_tokens: 16385
|
|
|
|
max_output_tokens: 4096
|
|
|
|
input_price: 0.5
|
|
|
|
output_price: 1.5
|
|
|
|
supports_function_calling: true
|
|
|
|
- name: gpt-3.5-turbo-1106
|
|
|
|
max_input_tokens: 16385
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 1
|
|
|
|
output_price: 2
|
|
|
|
supports_function_calling: true
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: text-embedding-3-large
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 8191
|
2024-06-12 00:31:22 +00:00
|
|
|
default_chunk_size: 4000
|
2024-06-05 01:02:23 +00:00
|
|
|
max_concurrent_chunks: 100
|
|
|
|
- name: text-embedding-3-small
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 8191
|
2024-06-12 00:31:22 +00:00
|
|
|
default_chunk_size: 4000
|
2024-06-05 01:02:23 +00:00
|
|
|
max_concurrent_chunks: 100
|
2024-06-19 04:15:54 +00:00
|
|
|
- name: text-embedding-ada-002
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 8191
|
|
|
|
default_chunk_size: 4000
|
|
|
|
max_concurrent_chunks: 100
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: gemini
|
2024-04-29 03:57:02 +00:00
|
|
|
# docs:
|
|
|
|
# - https://ai.google.dev/models/gemini
|
|
|
|
# - https://ai.google.dev/pricing
|
|
|
|
# - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent
|
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from list models api
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: gemini-1.5-pro-latest
|
2024-05-15 01:36:31 +00:00
|
|
|
max_input_tokens: 1048576
|
|
|
|
max_output_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 3.5
|
|
|
|
output_price: 10.5
|
2024-05-15 01:36:31 +00:00
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: gemini-1.5-flash-latest
|
2024-04-28 02:14:12 +00:00
|
|
|
max_input_tokens: 1048576
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.35
|
|
|
|
output_price: 1.05
|
2024-04-28 02:14:12 +00:00
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: gemini-1.0-pro-latest
|
|
|
|
max_input_tokens: 30720
|
|
|
|
max_output_tokens: 2048
|
|
|
|
input_price: 0.5
|
|
|
|
output_price: 1.5
|
|
|
|
supports_function_calling: true
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: text-embedding-004
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 2048
|
|
|
|
default_chunk_size: 2000
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: claude
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://docs.anthropic.com/claude/docs/models-overview
|
|
|
|
# - https://docs.anthropic.com/claude/reference/messages-streaming
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from models doc
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
|
|
|
- name: claude-3-opus-20240229
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 15
|
|
|
|
output_price: 75
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: claude-3-sonnet-20240229
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: claude-3-haiku-20240307
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 0.25
|
|
|
|
output_price: 1.25
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: mistral
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
2024-04-28 03:27:06 +00:00
|
|
|
# - https://docs.mistral.ai/getting-started/models/
|
2024-04-28 02:14:12 +00:00
|
|
|
# - https://mistral.ai/technology/#pricing
|
|
|
|
# - https://docs.mistral.ai/api/
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - unable to get max_output_tokens info
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
|
|
|
- name: open-mistral-7b
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 0.25
|
|
|
|
output_price: 0.25
|
|
|
|
- name: open-mixtral-8x7b
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 0.7
|
|
|
|
output_price: 0.7
|
|
|
|
- name: open-mixtral-8x22b
|
|
|
|
max_input_tokens: 64000
|
|
|
|
input_price: 2
|
|
|
|
output_price: 6
|
|
|
|
- name: mistral-small-latest
|
|
|
|
max_input_tokens: 32000
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 1
|
|
|
|
output_price: 3
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: mistral-large-latest
|
|
|
|
max_input_tokens: 32000
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 4
|
|
|
|
output_price: 12
|
|
|
|
- name: codestral-latest
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 1
|
|
|
|
output_price: 3
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: mistral-embed
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 8092
|
2024-06-12 00:31:22 +00:00
|
|
|
default_chunk_size: 4000
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: cohere
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://docs.cohere.com/docs/command-r
|
|
|
|
# - https://cohere.com/pricing
|
|
|
|
# - https://docs.cohere.com/reference/chat
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes
|
|
|
|
# - get max_output_tokens info from api error
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
|
|
|
- name: command-r
|
|
|
|
max_input_tokens: 128000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4000
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 0.5
|
|
|
|
output_price: 1.5
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: command-r-plus
|
|
|
|
max_input_tokens: 128000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4000
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: embed-english-v3.0
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 512
|
2024-06-14 11:12:18 +00:00
|
|
|
default_chunk_size: 1000
|
2024-06-05 01:02:23 +00:00
|
|
|
max_concurrent_chunks: 96
|
|
|
|
- name: embed-multilingual-v3.0
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 512
|
2024-06-14 11:12:18 +00:00
|
|
|
default_chunk_size: 1000
|
2024-06-05 01:02:23 +00:00
|
|
|
max_concurrent_chunks: 96
|
2024-06-20 22:00:26 +00:00
|
|
|
- name: rerank-english-v3.0
|
|
|
|
mode: rerank
|
|
|
|
max_input_tokens: 4096
|
|
|
|
- name: rerank-multilingual-v3.0
|
|
|
|
mode: rerank
|
|
|
|
max_input_tokens: 4096
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-06-18 04:37:25 +00:00
|
|
|
- platform: reka
|
|
|
|
docs:
|
|
|
|
# - https://www.reka.ai/ourmodels
|
|
|
|
# - https://www.reka.ai/reka-deploy
|
|
|
|
# - https://docs.reka.ai/api-reference/chat/create
|
|
|
|
models:
|
|
|
|
- name: reka-core
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 10
|
|
|
|
output_price: 25
|
|
|
|
supports_vision: true
|
|
|
|
- name: reka-flash
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.8
|
|
|
|
output_price: 2
|
|
|
|
supports_vision: true
|
|
|
|
- name: reka-edge
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.4
|
|
|
|
output_price: 1
|
|
|
|
supports_vision: true
|
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: perplexity
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://docs.perplexity.ai/docs/model-cards
|
|
|
|
# - https://docs.perplexity.ai/docs/pricing
|
|
|
|
# - https://docs.perplexity.ai/reference/post_chat_completions
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes
|
|
|
|
# - get max_output_tokens info from api error
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-05-07 08:16:18 +00:00
|
|
|
- name: llama-3-sonar-small-32k-chat
|
|
|
|
max_input_tokens: 32768
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 32768
|
2024-05-07 08:16:18 +00:00
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: llama-3-sonar-large-32k-chat
|
|
|
|
max_input_tokens: 32768
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 32768
|
2024-05-07 08:16:18 +00:00
|
|
|
input_price: 0.6
|
|
|
|
output_price: 0.6
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: llama-3-8b-instruct
|
|
|
|
max_input_tokens: 8192
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: llama-3-70b-instruct
|
|
|
|
max_input_tokens: 8192
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 1
|
|
|
|
output_price: 1
|
|
|
|
- name: mixtral-8x7b-instruct
|
|
|
|
max_input_tokens: 16384
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 16384
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 0.6
|
|
|
|
output_price: 0.6
|
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: groq
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://console.groq.com/docs/models
|
2024-04-29 09:05:02 +00:00
|
|
|
# - https://wow.groq.com
|
2024-04-28 02:14:12 +00:00
|
|
|
# - https://console.groq.com/docs/text-chat
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
2024-04-29 09:05:02 +00:00
|
|
|
# - get max_output_tokens info from playgourd
|
2024-04-29 03:57:02 +00:00
|
|
|
# - all models are free with rate limits
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
|
|
|
- name: llama3-8b-8192
|
|
|
|
max_input_tokens: 8192
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-04-29 09:05:02 +00:00
|
|
|
input_price: 0.05
|
|
|
|
output_price: 0.10
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: llama3-70b-8192
|
|
|
|
max_input_tokens: 8192
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-04-29 09:05:02 +00:00
|
|
|
input_price: 0.59
|
|
|
|
output_price: 0.79
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: mixtral-8x7b-32768
|
|
|
|
max_input_tokens: 32768
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 32768
|
2024-04-29 09:05:02 +00:00
|
|
|
input_price: 0.27
|
|
|
|
output_price: 0.27
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: vertexai
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
|
|
|
|
# - https://cloud.google.com/vertex-ai/generative-ai/pricing
|
|
|
|
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from models doc
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: gemini-1.5-pro-001
|
|
|
|
max_input_tokens: 1000000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 1.25
|
|
|
|
output_price: 3.75
|
2024-04-28 02:14:12 +00:00
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
|
|
|
- name: gemini-1.5-flash-001
|
2024-05-15 01:36:31 +00:00
|
|
|
max_input_tokens: 1000000
|
|
|
|
max_output_tokens: 8192
|
|
|
|
input_price: 0.125
|
|
|
|
output_price: 0.375
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
|
|
|
- name: gemini-1.0-pro-002
|
|
|
|
max_input_tokens: 24568
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.125
|
|
|
|
output_price: 0.375
|
|
|
|
supports_function_calling: true
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: text-embedding-004
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 3072
|
|
|
|
default_chunk_size: 3000
|
|
|
|
max_concurrent_chunks: 5
|
|
|
|
- name: text-multilingual-embedding-002
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 3072
|
|
|
|
default_chunk_size: 3000
|
|
|
|
max_concurrent_chunks: 5
|
2024-05-06 00:19:42 +00:00
|
|
|
|
|
|
|
- platform: vertexai-claude
|
|
|
|
# docs:
|
|
|
|
# - https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
|
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from models doc
|
|
|
|
# - claude models have not been tested
|
|
|
|
models:
|
2024-04-28 02:55:41 +00:00
|
|
|
- name: claude-3-opus@20240229
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 15
|
|
|
|
output_price: 75
|
2024-04-28 02:55:41 +00:00
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:55:41 +00:00
|
|
|
- name: claude-3-sonnet@20240229
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
2024-04-28 02:55:41 +00:00
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:55:41 +00:00
|
|
|
- name: claude-3-haiku@20240307
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 0.25
|
|
|
|
output_price: 1.25
|
2024-04-28 02:55:41 +00:00
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: bedrock
|
2024-04-28 03:27:06 +00:00
|
|
|
# docs:
|
|
|
|
# - https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns
|
|
|
|
# - https://aws.amazon.com/bedrock/pricing/
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from playground
|
|
|
|
# - claude/llama models have not been tested
|
2024-04-28 03:27:06 +00:00
|
|
|
models:
|
|
|
|
- name: anthropic.claude-3-opus-20240229-v1:0
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 15
|
|
|
|
output_price: 75
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 03:27:06 +00:00
|
|
|
- name: anthropic.claude-3-sonnet-20240229-v1:0
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 03:27:06 +00:00
|
|
|
- name: anthropic.claude-3-haiku-20240307-v1:0
|
|
|
|
max_input_tokens: 200000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 0.25
|
|
|
|
output_price: 1.25
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 03:27:06 +00:00
|
|
|
- name: meta.llama3-8b-instruct-v1:0
|
|
|
|
max_input_tokens: 8192
|
2024-05-25 21:52:24 +00:00
|
|
|
max_output_tokens: 2048
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 0.4
|
|
|
|
output_price: 0.6
|
|
|
|
- name: meta.llama3-70b-instruct-v1:0
|
|
|
|
max_input_tokens: 8192
|
2024-05-25 21:52:24 +00:00
|
|
|
max_output_tokens: 2048
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 2.65
|
|
|
|
output_price: 3.5
|
|
|
|
- name: mistral.mistral-7b-instruct-v0:2
|
|
|
|
max_input_tokens: 32000
|
|
|
|
max_output_tokens: 8192
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 0.15
|
|
|
|
output_price: 0.2
|
|
|
|
- name: mistral.mixtral-8x7b-instruct-v0:1
|
|
|
|
max_input_tokens: 32000
|
2024-04-29 03:57:02 +00:00
|
|
|
max_output_tokens: 8192
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 0.45
|
|
|
|
output_price: 0.7
|
|
|
|
- name: mistral.mistral-large-2402-v1:0
|
|
|
|
max_input_tokens: 32000
|
|
|
|
max_output_tokens: 8192
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 03:27:06 +00:00
|
|
|
input_price: 8
|
|
|
|
output_price: 2.4
|
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: cloudflare
|
2024-04-29 01:27:11 +00:00
|
|
|
# docs:
|
|
|
|
# - https://developers.cloudflare.com/workers-ai/models/
|
|
|
|
# - https://developers.cloudflare.com/workers-ai/platform/pricing/
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
2024-04-30 22:01:10 +00:00
|
|
|
# - unable to get max_output_tokens info
|
2024-04-29 01:27:11 +00:00
|
|
|
models:
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: '@cf/meta/llama-3-8b-instruct'
|
|
|
|
max_input_tokens: 4096
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
|
|
|
|
max_input_tokens: 4096
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
|
|
|
|
max_input_tokens: 4096
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-29 01:27:11 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: replicate
|
2024-04-29 23:07:09 +00:00
|
|
|
# docs:
|
2024-06-14 22:39:55 +00:00
|
|
|
# - https://replicate.com/explore
|
2024-04-29 23:07:09 +00:00
|
|
|
# - https://replicate.com/pricing
|
2024-05-18 11:06:21 +00:00
|
|
|
# - https://replicate.com/docs/reference/http
|
2024-04-29 23:07:09 +00:00
|
|
|
# notes:
|
|
|
|
# - max_output_tokens is required but unknown
|
|
|
|
models:
|
|
|
|
- name: meta/meta-llama-3-70b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-29 23:07:09 +00:00
|
|
|
input_price: 0.65
|
|
|
|
output_price: 2.75
|
|
|
|
- name: meta/meta-llama-3-8b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-29 23:07:09 +00:00
|
|
|
input_price: 0.05
|
|
|
|
output_price: 0.25
|
|
|
|
- name: mistralai/mistral-7b-instruct-v0.2
|
|
|
|
max_input_tokens: 32000
|
|
|
|
max_output_tokens: 8192
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-29 23:07:09 +00:00
|
|
|
input_price: 0.05
|
|
|
|
output_price: 0.25
|
|
|
|
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
|
|
|
max_input_tokens: 32000
|
|
|
|
max_output_tokens: 8192
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-29 23:07:09 +00:00
|
|
|
input_price: 0.3
|
|
|
|
output_price: 1
|
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: ernie
|
2024-04-29 01:27:11 +00:00
|
|
|
# docs:
|
2024-04-28 02:14:12 +00:00
|
|
|
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
|
|
|
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from models doc
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: ernie-4.0-8k-0613
|
2024-04-28 02:14:12 +00:00
|
|
|
max_input_tokens: 5120
|
|
|
|
max_output_tokens: 2048
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 16.8
|
|
|
|
output_price: 16.8
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: ernie-3.5-8k-0613
|
2024-04-28 02:14:12 +00:00
|
|
|
max_input_tokens: 5120
|
|
|
|
max_output_tokens: 2048
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-28 02:14:12 +00:00
|
|
|
input_price: 1.68
|
|
|
|
output_price: 1.68
|
|
|
|
- name: ernie-speed-128k
|
|
|
|
max_input_tokens: 124000
|
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 0
|
|
|
|
output_price: 0
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: ernie-lite-8k
|
|
|
|
max_input_tokens: 7168
|
|
|
|
max_output_tokens: 2048
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 0
|
|
|
|
output_price: 0
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: qianwen
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://help.aliyun.com/zh/dashscope/developer-reference/tongyiqianwen-large-language-models/
|
|
|
|
# - https://help.aliyun.com/zh/dashscope/developer-reference/qwen-vl-plus/
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - get max_output_tokens info from models doc
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
2024-05-23 04:26:24 +00:00
|
|
|
- name: qwen-long
|
|
|
|
max_input_tokens: 1000000
|
|
|
|
input_price: 0.07
|
|
|
|
output_price: 0.28
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: qwen-turbo
|
|
|
|
max_input_tokens: 6000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 1500
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 0.28
|
|
|
|
output_price: 0.84
|
2024-06-18 22:17:29 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: qwen-plus
|
|
|
|
max_input_tokens: 30000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 2000
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 0.56
|
|
|
|
output_price: 1.68
|
2024-06-18 22:17:29 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: qwen-max
|
|
|
|
max_input_tokens: 6000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 2000
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 5.6
|
2024-04-28 02:14:12 +00:00
|
|
|
output_price: 16.8
|
2024-06-18 22:17:29 +00:00
|
|
|
supports_function_calling: true
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: qwen-max-longcontext
|
2024-05-23 04:26:24 +00:00
|
|
|
input_price: 5.6
|
|
|
|
output_price: 16.8
|
2024-04-28 02:14:12 +00:00
|
|
|
max_input_tokens: 28000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 2000
|
2024-04-28 02:14:12 +00:00
|
|
|
- name: qwen-vl-plus
|
|
|
|
input_price: 1.12
|
|
|
|
output_price: 1.12
|
|
|
|
supports_vision: true
|
|
|
|
- name: qwen-vl-max
|
|
|
|
input_price: 2.8
|
|
|
|
output_price: 2.8
|
|
|
|
supports_vision: true
|
2024-06-05 01:02:23 +00:00
|
|
|
- name: text-embedding-v2
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 2048
|
|
|
|
default_chunk_size: 2000
|
|
|
|
max_concurrent_chunks: 5
|
2024-04-28 02:14:12 +00:00
|
|
|
|
2024-04-30 04:52:58 +00:00
|
|
|
- platform: moonshot
|
2024-04-28 02:14:12 +00:00
|
|
|
# docs:
|
|
|
|
# - https://platform.moonshot.cn/docs/intro
|
|
|
|
# - https://platform.moonshot.cn/docs/pricing
|
|
|
|
# - https://platform.moonshot.cn/docs/api-reference
|
2024-04-29 03:57:02 +00:00
|
|
|
# notes:
|
|
|
|
# - unable to get max_output_tokens info
|
2024-04-28 02:14:12 +00:00
|
|
|
models:
|
|
|
|
- name: moonshot-v1-8k
|
|
|
|
max_input_tokens: 8000
|
|
|
|
input_price: 1.68
|
|
|
|
output_price: 1.68
|
|
|
|
- name: moonshot-v1-32k
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 3.36
|
|
|
|
output_price: 3.36
|
|
|
|
- name: moonshot-v1-128k
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 8.4
|
|
|
|
output_price: 8.4
|
2024-04-30 22:01:10 +00:00
|
|
|
|
2024-05-07 08:16:18 +00:00
|
|
|
- platform: deepseek
|
2024-05-07 08:40:18 +00:00
|
|
|
# docs:
|
|
|
|
# - https://platform.deepseek.com/api-docs/
|
|
|
|
# - https://platform.deepseek.com/api-docs/pricing
|
2024-05-07 08:16:18 +00:00
|
|
|
models:
|
|
|
|
- name: deepseek-chat
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.28
|
|
|
|
- name: deepseek-coder
|
2024-06-14 22:39:55 +00:00
|
|
|
max_input_tokens: 32768
|
2024-05-07 08:16:18 +00:00
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.28
|
|
|
|
|
2024-05-07 08:40:18 +00:00
|
|
|
- platform: zhipuai
|
|
|
|
# docs:
|
|
|
|
# - https://open.bigmodel.cn/dev/howuse/model
|
|
|
|
# - https://open.bigmodel.cn/pricing
|
|
|
|
models:
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: glm-4-0520
|
2024-05-07 08:40:18 +00:00
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 14
|
|
|
|
output_price: 14
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
|
|
|
- name: glm-4-airx
|
|
|
|
max_input_tokens: 8092
|
|
|
|
input_price: 1.4
|
|
|
|
output_price: 1.4
|
|
|
|
supports_function_calling: true
|
|
|
|
- name: glm-4-air
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.14
|
|
|
|
supports_function_calling: true
|
|
|
|
- name: glm-4-flash
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.014
|
|
|
|
output_price: 0.014
|
|
|
|
supports_function_calling: true
|
2024-05-07 08:40:18 +00:00
|
|
|
- name: glm-4v
|
|
|
|
max_input_tokens: 2048
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 7
|
|
|
|
output_price: 7
|
2024-05-07 09:17:15 +00:00
|
|
|
supports_vision: true
|
2024-06-19 04:15:54 +00:00
|
|
|
- name: embedding-2
|
|
|
|
mode: embedding
|
|
|
|
max_input_tokens: 2048
|
|
|
|
default_chunk_size: 2000
|
2024-05-07 08:40:18 +00:00
|
|
|
|
2024-06-18 03:27:51 +00:00
|
|
|
- platform: lingyiwanwu
|
|
|
|
# docs:
|
|
|
|
# - https://platform.lingyiwanwu.com/docs#%E6%A8%A1%E5%9E%8B
|
|
|
|
# - https://platform.lingyiwanwu.com/docs#%E8%AE%A1%E8%B4%B9%E5%8D%95%E5%85%83
|
|
|
|
models:
|
|
|
|
- name: yi-large
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 2.8
|
|
|
|
output_price: 2.8
|
|
|
|
- name: yi-medium
|
|
|
|
max_input_tokens: 16384
|
|
|
|
input_price: 0.35
|
|
|
|
output_price: 0.35
|
|
|
|
- name: yi-vision
|
|
|
|
max_input_tokens: 4096
|
|
|
|
input_price: 0.84
|
|
|
|
output_price: 0.84
|
|
|
|
supports_vision: true
|
|
|
|
- name: yi-medium-200k
|
|
|
|
max_input_tokens: 200000
|
|
|
|
input_price: 1.68
|
|
|
|
output_price: 1.68
|
|
|
|
- name: yi-spark
|
|
|
|
max_input_tokens: 16384
|
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.14
|
|
|
|
- name: yi-large-rag
|
|
|
|
max_input_tokens: 16384
|
|
|
|
input_price: 3.5
|
|
|
|
output_price: 3.5
|
|
|
|
- name: yi-large-turbo
|
|
|
|
max_input_tokens: 16384
|
|
|
|
input_price: 1.68
|
|
|
|
output_price: 1.68
|
|
|
|
|
2024-04-30 22:01:10 +00:00
|
|
|
- platform: anyscale
|
|
|
|
# docs:
|
2024-06-14 22:39:55 +00:00
|
|
|
# - https://docs.anyscale.com/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct
|
2024-04-30 22:01:10 +00:00
|
|
|
# - https://docs.endpoints.anyscale.com/pricing
|
|
|
|
models:
|
|
|
|
- name: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.15
|
|
|
|
output_price: 0.15
|
|
|
|
- name: meta-llama/Meta-Llama-3-70B-Instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 1.0
|
|
|
|
output_price: 1.0
|
|
|
|
- name: codellama/CodeLlama-70b-Instruct-hf
|
|
|
|
max_input_tokens: 4096
|
|
|
|
input_price: 1.0
|
|
|
|
output_price: 1.0
|
|
|
|
- name: mistralai/Mistral-7B-Instruct-v0.1
|
|
|
|
max_input_tokens: 16384
|
|
|
|
input_price: 0.15
|
|
|
|
output_price: 0.15
|
|
|
|
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.50
|
|
|
|
output_price: 0.50
|
|
|
|
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 0.90
|
|
|
|
output_price: 0.90
|
|
|
|
|
|
|
|
- platform: deepinfra
|
|
|
|
# docs:
|
|
|
|
# - https://deepinfra.com/models
|
|
|
|
# - https://deepinfra.com/pricing
|
|
|
|
models:
|
|
|
|
- name: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.08
|
|
|
|
output_price: 0.08
|
|
|
|
- name: meta-llama/Meta-Llama-3-70B-Instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.59
|
|
|
|
output_price: 0.79
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: mistralai/Mistral-7B-Instruct-v0.3
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.07
|
|
|
|
output_price: 0.07
|
|
|
|
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.24
|
|
|
|
output_price: 0.24
|
|
|
|
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 0.65
|
|
|
|
output_price: 0.65
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: Qwen/Qwen2-72B-Instruct
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.59
|
|
|
|
output_price: 0.79
|
|
|
|
- name: microsoft/Phi-3-medium-4k-instruct
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 4096
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.14
|
2024-04-30 22:01:10 +00:00
|
|
|
|
|
|
|
- platform: fireworks
|
|
|
|
# docs:
|
|
|
|
# - https://fireworks.ai/models
|
|
|
|
# - https://fireworks.ai/pricing
|
|
|
|
models:
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: accounts/fireworks/models/firellava-13b
|
|
|
|
max_input_tokens: 4096
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
supports_vision: true
|
|
|
|
- name: accounts/fireworks/models/firefunction-v1
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: accounts/fireworks/models/llama-v3-8b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: accounts/fireworks/models/llama-v3-70b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: accounts/fireworks/models/mistral-7b-instruct-v3
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: accounts/fireworks/models/mixtral-8x7b-instruct
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.5
|
|
|
|
output_price: 0.5
|
|
|
|
- name: accounts/fireworks/models/mixtral-8x22b-instruct
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: accounts/fireworks/models/qwen2-72b-instruct
|
|
|
|
max_input_tokens: 32768
|
2024-04-30 22:01:10 +00:00
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: accounts/fireworks/models/phi-3-mini-128k-instruct
|
|
|
|
max_input_tokens: 131072
|
2024-04-30 22:01:10 +00:00
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: accounts/fireworks/models/phi-3-vision-128k-instruct
|
|
|
|
max_input_tokens: 131072
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
supports_vision: true
|
2024-04-30 22:01:10 +00:00
|
|
|
|
|
|
|
- platform: openrouter
|
|
|
|
# docs:
|
|
|
|
# - https://openrouter.ai/docs#models
|
|
|
|
models:
|
|
|
|
- name: meta-llama/llama-3-8b-instruct
|
|
|
|
max_input_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.07
|
|
|
|
output_price: 0.07
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: meta-llama/llama-3-8b-instruct:nitro
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: meta-llama/llama-3-8b-instruct:extended
|
|
|
|
max_input_tokens: 16384
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.2
|
|
|
|
output_price: 1.125
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: meta-llama/llama-3-70b-instruct
|
|
|
|
max_input_tokens: 8192
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.59
|
|
|
|
output_price: 0.79
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: meta-llama/llama-3-70b-instruct:nitro
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: mistralai/mistral-7b-instruct-v0.3
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
2024-06-14 22:39:55 +00:00
|
|
|
input_price: 0.07
|
|
|
|
output_price: 0.07
|
|
|
|
- name: microsoft/phi-3-mini-128k-instruct
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.1
|
|
|
|
output_price: 0.1
|
|
|
|
- name: qwen/qwen-2-72b-instruct
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-05-13 22:11:34 +00:00
|
|
|
- name: openai/gpt-4o
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 5
|
|
|
|
output_price: 15
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: openai/gpt-4-turbo
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 10
|
|
|
|
output_price: 30
|
|
|
|
supports_vision: true
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: openai/gpt-4-turbo-preview
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 10
|
|
|
|
output_price: 30
|
2024-05-18 11:06:21 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: openai/gpt-4
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 30
|
|
|
|
output_price: 60
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: openai/gpt-3.5-turbo
|
|
|
|
max_input_tokens: 16385
|
|
|
|
input_price: 0.5
|
|
|
|
output_price: 1.5
|
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: google/gemini-pro-1.5
|
|
|
|
max_input_tokens: 2800000
|
|
|
|
input_price: 2.5
|
|
|
|
output_price: 7.5
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
|
|
|
- name: google/gemini-flash-1.5
|
|
|
|
max_input_tokens: 2800000
|
|
|
|
input_price: 0.25
|
|
|
|
output_price: 0.75
|
|
|
|
supports_vision: true
|
|
|
|
supports_function_calling: true
|
|
|
|
- name: google/gemini-pro
|
|
|
|
max_input_tokens: 91728
|
|
|
|
input_price: 0.125
|
|
|
|
output_price: 0.375
|
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: anthropic/claude-3-opus
|
|
|
|
max_input_tokens: 200000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-30 22:01:10 +00:00
|
|
|
input_price: 15
|
|
|
|
output_price: 75
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: anthropic/claude-3-sonnet
|
|
|
|
max_input_tokens: 200000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-30 22:01:10 +00:00
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: anthropic/claude-3-haiku
|
|
|
|
max_input_tokens: 200000
|
2024-05-08 05:46:26 +00:00
|
|
|
max_output_tokens: 4096
|
2024-06-05 01:10:12 +00:00
|
|
|
require_max_tokens: true
|
2024-04-30 22:01:10 +00:00
|
|
|
input_price: 0.25
|
|
|
|
output_price: 1.25
|
|
|
|
supports_vision: true
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: mistralai/mixtral-8x7b-instruct
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.24
|
|
|
|
output_price: 0.24
|
|
|
|
- name: mistralai/mixtral-8x22b-instruct
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 0.65
|
|
|
|
output_price: 0.65
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: mistralai/mistral-small
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 2
|
|
|
|
output_price: 6
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: mistralai/mistral-large
|
|
|
|
max_input_tokens: 32000
|
|
|
|
input_price: 8
|
|
|
|
output_price: 24
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: cohere/command-r
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 0.5
|
|
|
|
output_price: 1.5
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
2024-04-30 22:01:10 +00:00
|
|
|
- name: cohere/command-r-plus
|
|
|
|
max_input_tokens: 128000
|
|
|
|
input_price: 3
|
|
|
|
output_price: 15
|
2024-06-14 22:39:55 +00:00
|
|
|
supports_function_calling: true
|
|
|
|
- name: deepseek/deepseek-chat
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.28
|
|
|
|
- name: deepseek/deepseek-coder
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.14
|
|
|
|
output_price: 0.28
|
|
|
|
|
2024-04-30 22:01:10 +00:00
|
|
|
|
|
|
|
- platform: octoai
|
|
|
|
# docs:
|
|
|
|
# - https://octo.ai/docs/getting-started/inference-models
|
|
|
|
# - https://octo.ai/pricing/text-gen-solution/
|
|
|
|
models:
|
|
|
|
- name: meta-llama-3-8b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.13
|
|
|
|
output_price: 0.13
|
|
|
|
- name: meta-llama-3-70b-instruct
|
|
|
|
max_input_tokens: 8192
|
|
|
|
input_price: 0.86
|
|
|
|
output_price: 0.86
|
|
|
|
- name: mistral-7b-instruct
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.13
|
|
|
|
output_price: 0.13
|
|
|
|
- name: mixtral-8x7b-instruct
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.34
|
|
|
|
output_price: 0.34
|
|
|
|
- name: mixtral-8x22b-instruct
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 0.86
|
|
|
|
output_price: 0.86
|
|
|
|
|
|
|
|
- platform: together
|
|
|
|
# docs:
|
|
|
|
# - https://docs.together.ai/docs/inference-models
|
|
|
|
# - https://www.together.ai/pricing
|
|
|
|
models:
|
|
|
|
- name: meta-llama/Llama-3-8b-chat-hf
|
|
|
|
max_input_tokens: 8000
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: meta-llama/Llama-3-70b-chat-hf
|
|
|
|
max_input_tokens: 8000
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: mistralai/Mistral-7B-Instruct-v0.3
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.2
|
|
|
|
output_price: 0.2
|
|
|
|
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
|
|
max_input_tokens: 32768
|
|
|
|
input_price: 0.9
|
|
|
|
output_price: 0.9
|
|
|
|
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
|
|
max_input_tokens: 65536
|
|
|
|
input_price: 1.2
|
|
|
|
output_price: 1.2
|
2024-06-14 22:39:55 +00:00
|
|
|
- name: Qwen/Qwen2-72B-Instruct
|
2024-04-30 22:01:10 +00:00
|
|
|
max_input_tokens: 32768
|
2024-05-07 09:17:15 +00:00
|
|
|
input_price: 0.9
|
2024-06-14 22:39:55 +00:00
|
|
|
output_price: 0.9
|