refactor: update models.yaml

This commit is contained in:
sigoden 2024-06-21 06:51:47 +00:00
parent 34d568d5e2
commit 6d05afc81b

View File

@ -23,49 +23,26 @@
output_price: 30 output_price: 30
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: gpt-4-turbo-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_function_calling: true
- name: gpt-4-1106-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_function_calling: true
- name: gpt-4
max_input_tokens: 8192
max_output_tokens: 4096
input_price: 30
output_price: 60
- name: gpt-3.5-turbo - name: gpt-3.5-turbo
max_input_tokens: 16385 max_input_tokens: 16385
max_output_tokens: 4096 max_output_tokens: 4096
input_price: 0.5 input_price: 0.5
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
max_output_tokens: 4096
input_price: 1
output_price: 2
supports_function_calling: true
- name: text-embedding-3-large - name: text-embedding-3-large
mode: embedding mode: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 4000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_concurrent_chunks: 100
- name: text-embedding-3-small - name: text-embedding-3-small
mode: embedding mode: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 4000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_concurrent_chunks: 100
- name: text-embedding-ada-002 - name: text-embedding-ada-002
mode: embedding mode: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 4000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_concurrent_chunks: 100
- platform: gemini - platform: gemini
@ -99,7 +76,7 @@
- name: text-embedding-004 - name: text-embedding-004
mode: embedding mode: embedding
max_input_tokens: 2048 max_input_tokens: 2048
default_chunk_size: 2000 default_chunk_size: 1500
- platform: claude - platform: claude
# docs: # docs:
@ -176,25 +153,21 @@
- name: mistral-embed - name: mistral-embed
mode: embedding mode: embedding
max_input_tokens: 8092 max_input_tokens: 8092
default_chunk_size: 4000 default_chunk_size: 2000
- platform: cohere - platform: cohere
# docs: # docs:
# - https://docs.cohere.com/docs/command-r # - https://docs.cohere.com/docs/command-r
# - https://cohere.com/pricing # - https://cohere.com/pricing
# - https://docs.cohere.com/reference/chat # - https://docs.cohere.com/reference/chat
# notes
# - get max_output_tokens info from api error
models: models:
- name: command-r - name: command-r
max_input_tokens: 128000 max_input_tokens: 128000
max_output_tokens: 4000
input_price: 0.5 input_price: 0.5
output_price: 1.5 output_price: 1.5
supports_function_calling: true supports_function_calling: true
- name: command-r-plus - name: command-r-plus
max_input_tokens: 128000 max_input_tokens: 128000
max_output_tokens: 4000
input_price: 3 input_price: 3
output_price: 15 output_price: 15
supports_function_calling: true supports_function_calling: true
@ -242,32 +215,33 @@
# - https://docs.perplexity.ai/docs/model-cards # - https://docs.perplexity.ai/docs/model-cards
# - https://docs.perplexity.ai/docs/pricing # - https://docs.perplexity.ai/docs/pricing
# - https://docs.perplexity.ai/reference/post_chat_completions # - https://docs.perplexity.ai/reference/post_chat_completions
# notes
# - get max_output_tokens info from api error
models: models:
- name: llama-3-sonar-small-32k-chat - name: llama-3-sonar-small-32k-chat
max_input_tokens: 32768 max_input_tokens: 32768
max_output_tokens: 32768 input_price: 0.2
output_price: 0.2
- name: llama-3-sonar-small-32k-online
max_input_tokens: 28000
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
- name: llama-3-sonar-large-32k-chat - name: llama-3-sonar-large-32k-chat
max_input_tokens: 32768 max_input_tokens: 32768
max_output_tokens: 32768 input_price: 1
input_price: 0.6 output_price: 1
output_price: 0.6 - name: llama-3-sonar-large-32k-online
max_input_tokens: 28000
input_price: 1
output_price: 1
- name: llama-3-8b-instruct - name: llama-3-8b-instruct
max_input_tokens: 8192 max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
- name: llama-3-70b-instruct - name: llama-3-70b-instruct
max_input_tokens: 8192 max_input_tokens: 8192
max_output_tokens: 8192
input_price: 1 input_price: 1
output_price: 1 output_price: 1
- name: mixtral-8x7b-instruct - name: mixtral-8x7b-instruct
max_input_tokens: 16384 max_input_tokens: 16384
max_output_tokens: 16384
input_price: 0.6 input_price: 0.6
output_price: 0.6 output_price: 0.6
@ -277,24 +251,28 @@
# - https://wow.groq.com # - https://wow.groq.com
# - https://console.groq.com/docs/text-chat # - https://console.groq.com/docs/text-chat
# notes: # notes:
# - get max_output_tokens info from playgourd
# - all models are free with rate limits # - all models are free with rate limits
models: models:
- name: llama3-8b-8192 - name: llama3-8b-8192
max_input_tokens: 8192 max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.05 input_price: 0.05
output_price: 0.10 output_price: 0.08
supports_function_calling: true
- name: llama3-70b-8192 - name: llama3-70b-8192
max_input_tokens: 8192 max_input_tokens: 8192
max_output_tokens: 8192
input_price: 0.59 input_price: 0.59
output_price: 0.79 output_price: 0.79
supports_function_calling: true
- name: mixtral-8x7b-32768 - name: mixtral-8x7b-32768
max_input_tokens: 32768 max_input_tokens: 32768
max_output_tokens: 32768 input_price: 0.24
input_price: 0.27 output_price: 0.24
output_price: 0.27 supports_function_calling: true
- name: gemma-7b-it
max_input_tokens: 8192
input_price: 0.07
output_price: 0.07
supports_function_calling: true
- platform: vertexai - platform: vertexai
# docs: # docs:
@ -327,12 +305,12 @@
- name: text-embedding-004 - name: text-embedding-004
mode: embedding mode: embedding
max_input_tokens: 3072 max_input_tokens: 3072
default_chunk_size: 3000 default_chunk_size: 2000
max_concurrent_chunks: 5 max_concurrent_chunks: 5
- name: text-multilingual-embedding-002 - name: text-multilingual-embedding-002
mode: embedding mode: embedding
max_input_tokens: 3072 max_input_tokens: 3072
default_chunk_size: 3000 default_chunk_size: 2000
max_concurrent_chunks: 5 max_concurrent_chunks: 5
- platform: vertexai-claude - platform: vertexai-claude
@ -451,20 +429,32 @@
# - https://developers.cloudflare.com/workers-ai/models/ # - https://developers.cloudflare.com/workers-ai/models/
# - https://developers.cloudflare.com/workers-ai/platform/pricing/ # - https://developers.cloudflare.com/workers-ai/platform/pricing/
# notes: # notes:
# - unable to get max_output_tokens info # - get max_output_tokens from playground
models: models:
- name: '@cf/meta/llama-3-8b-instruct' - name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 4096 max_input_tokens: 6144
max_output_tokens: 4096 max_output_tokens: 2048
require_max_tokens: true require_max_tokens: true
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora' input_price: 0
max_input_tokens: 4096 output_price: 0
max_output_tokens: 4096 - name: '@hf/mistral/mistral-7b-instruct-v0.2'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/qwen/qwen1.5-14b-chat-awq' - name: '@cf/qwen/qwen1.5-14b-chat-awq'
max_input_tokens: 4096 max_input_tokens: 6144
max_output_tokens: 4096 max_output_tokens: 2048
require_max_tokens: true require_max_tokens: true
input_price: 0
output_price: 0
- name: '@cf/google/gemma-7b-it'
max_input_tokens: 6144
max_output_tokens: 2048
require_max_tokens: true
input_price: 0
output_price: 0
- platform: replicate - platform: replicate
# docs: # docs:
@ -576,7 +566,7 @@
- name: text-embedding-v2 - name: text-embedding-v2
mode: embedding mode: embedding
max_input_tokens: 2048 max_input_tokens: 2048
default_chunk_size: 2000 default_chunk_size: 1500
max_concurrent_chunks: 5 max_concurrent_chunks: 5
- platform: moonshot - platform: moonshot
@ -591,14 +581,17 @@
max_input_tokens: 8000 max_input_tokens: 8000
input_price: 1.68 input_price: 1.68
output_price: 1.68 output_price: 1.68
supports_function_calling: true
- name: moonshot-v1-32k - name: moonshot-v1-32k
max_input_tokens: 32000 max_input_tokens: 32000
input_price: 3.36 input_price: 3.36
output_price: 3.36 output_price: 3.36
supports_function_calling: true
- name: moonshot-v1-128k - name: moonshot-v1-128k
max_input_tokens: 128000 max_input_tokens: 128000
input_price: 8.4 input_price: 8.4
output_price: 8.4 output_price: 8.4
supports_function_calling: true
- platform: deepseek - platform: deepseek
# docs: # docs:
@ -647,7 +640,7 @@
- name: embedding-2 - name: embedding-2
mode: embedding mode: embedding
max_input_tokens: 2048 max_input_tokens: 2048
default_chunk_size: 2000 default_chunk_size: 1500
- platform: lingyiwanwu - platform: lingyiwanwu
# docs: # docs:
@ -686,8 +679,8 @@
- platform: anyscale - platform: anyscale
# docs: # docs:
# - https://docs.anyscale.com/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct # - https://docs.anyscale.com/endpoints/text-generation/query-a-model
# - https://docs.endpoints.anyscale.com/pricing # - https://www.anyscale.com/pricing-detail
models: models:
- name: meta-llama/Meta-Llama-3-8B-Instruct - name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192 max_input_tokens: 8192
@ -697,10 +690,6 @@
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 1.0 input_price: 1.0
output_price: 1.0 output_price: 1.0
- name: codellama/CodeLlama-70b-Instruct-hf
max_input_tokens: 4096
input_price: 1.0
output_price: 1.0
- name: mistralai/Mistral-7B-Instruct-v0.1 - name: mistralai/Mistral-7B-Instruct-v0.1
max_input_tokens: 16384 max_input_tokens: 16384
input_price: 0.15 input_price: 0.15
@ -713,6 +702,18 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.90 input_price: 0.90
output_price: 0.90 output_price: 0.90
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- name: BAAI/bge-large-en-v1.5
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: thenlper/gte-large
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- platform: deepinfra - platform: deepinfra
# docs: # docs:
@ -723,10 +724,12 @@
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.08 input_price: 0.08
output_price: 0.08 output_price: 0.08
supports_function_calling: true
- name: meta-llama/Meta-Llama-3-70B-Instruct - name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.59 input_price: 0.59
output_price: 0.79 output_price: 0.79
supports_function_calling: true
- name: mistralai/Mistral-7B-Instruct-v0.3 - name: mistralai/Mistral-7B-Instruct-v0.3
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.07 input_price: 0.07
@ -735,10 +738,16 @@
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.24 input_price: 0.24
output_price: 0.24 output_price: 0.24
supports_function_calling: true
- name: mistralai/Mixtral-8x22B-Instruct-v0.1 - name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.65 input_price: 0.65
output_price: 0.65 output_price: 0.65
supports_function_calling: true
- name: google/gemma-1.1-7b-it
max_input_tokens: 8192
input_price: 0.07
output_price: 0.07
- name: Qwen/Qwen2-72B-Instruct - name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.59 input_price: 0.59
@ -747,6 +756,46 @@
max_input_tokens: 4096 max_input_tokens: 4096
input_price: 0.14 input_price: 0.14
output_price: 0.14 output_price: 0.14
- name: BAAI/bge-large-en-v1.5
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: BAAI/bge-base-en-v1.5
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: BAAI/bge-m3
mode: embedding
max_input_tokens: 8192
default_chunk_size: 2000
- name: intfloat/e5-base-v2
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: intfloat/e5-large-v2
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: intfloat/multilingual-e5-large
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: sentence-transformers/all-MiniLM-L6-v2
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: sentence-transformers/paraphrase-MiniLM-L6-v2
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: thenlper/gte-base
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: thenlper/gte-large
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- platform: fireworks - platform: fireworks
# docs: # docs:
@ -758,7 +807,7 @@
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
supports_vision: true supports_vision: true
- name: accounts/fireworks/models/firefunction-v1 - name: accounts/fireworks/models/firefunction-v2
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
@ -783,6 +832,10 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.9 input_price: 0.9
output_price: 0.9 output_price: 0.9
- name: accounts/fireworks/models/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: accounts/fireworks/models/qwen2-72b-instruct - name: accounts/fireworks/models/qwen2-72b-instruct
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.9 input_price: 0.9
@ -796,6 +849,22 @@
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
supports_vision: true supports_vision: true
- name: nomic-ai/nomic-embed-text-v1.5
mode: embedding
max_input_tokens: 8192
default_chunk_size: 1500
- name: WhereIsAI/UAE-Large-V1
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: thenlper/gte-large
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: thenlper/gte-base
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- platform: openrouter - platform: openrouter
# docs: # docs:
@ -805,30 +874,22 @@
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.07 input_price: 0.07
output_price: 0.07 output_price: 0.07
- name: meta-llama/llama-3-8b-instruct:nitro
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: meta-llama/llama-3-8b-instruct:extended
max_input_tokens: 16384
input_price: 0.2
output_price: 1.125
- name: meta-llama/llama-3-70b-instruct - name: meta-llama/llama-3-70b-instruct
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.59 input_price: 0.59
output_price: 0.79 output_price: 0.79
- name: meta-llama/llama-3-70b-instruct:nitro
max_input_tokens: 8192
input_price: 0.9
output_price: 0.9
- name: mistralai/mistral-7b-instruct-v0.3
max_input_tokens: 32768
input_price: 0.07
output_price: 0.07
- name: microsoft/phi-3-mini-128k-instruct - name: microsoft/phi-3-mini-128k-instruct
max_input_tokens: 128000 max_input_tokens: 128000
input_price: 0.1 input_price: 0.1
output_price: 0.1 output_price: 0.1
- name: microsoft/phi-3-medium-4k-instruct
max_input_tokens: 4000
input_price: 0.14
output_price: 0.14
- name: microsoft/phi-3-medium-128k-instruct
max_input_tokens: 128000
input_price: 1
output_price: 1
- name: qwen/qwen-2-72b-instruct - name: qwen/qwen-2-72b-instruct
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.9 input_price: 0.9
@ -845,15 +906,6 @@
output_price: 30 output_price: 30
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: openai/gpt-4-turbo-preview
max_input_tokens: 128000
input_price: 10
output_price: 30
supports_function_calling: true
- name: openai/gpt-4
max_input_tokens: 8192
input_price: 30
output_price: 60
- name: openai/gpt-3.5-turbo - name: openai/gpt-3.5-turbo
max_input_tokens: 16385 max_input_tokens: 16385
input_price: 0.5 input_price: 0.5
@ -876,6 +928,14 @@
input_price: 0.125 input_price: 0.125
output_price: 0.375 output_price: 0.375
supports_function_calling: true supports_function_calling: true
- name: anthropic/claude-3.5-sonnet
max_input_tokens: 200000
max_output_tokens: 4096
require_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
supports_function_calling: true
- name: anthropic/claude-3-opus - name: anthropic/claude-3-opus
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 4096 max_output_tokens: 4096
@ -900,6 +960,10 @@
output_price: 1.25 output_price: 1.25
supports_vision: true supports_vision: true
supports_function_calling: true supports_function_calling: true
- name: mistralai/mistral-7b-instruct-v0.3
max_input_tokens: 32768
input_price: 0.07
output_price: 0.07
- name: mistralai/mixtral-8x7b-instruct - name: mistralai/mixtral-8x7b-instruct
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.24 input_price: 0.24
@ -908,17 +972,14 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.65 input_price: 0.65
output_price: 0.65 output_price: 0.65
supports_function_calling: true
- name: mistralai/mistral-small - name: mistralai/mistral-small
max_input_tokens: 32000 max_input_tokens: 32000
input_price: 2 input_price: 2
output_price: 6 output_price: 6
supports_function_calling: true
- name: mistralai/mistral-large - name: mistralai/mistral-large
max_input_tokens: 32000 max_input_tokens: 32000
input_price: 8 input_price: 8
output_price: 24 output_price: 24
supports_function_calling: true
- name: cohere/command-r - name: cohere/command-r
max_input_tokens: 128000 max_input_tokens: 128000
input_price: 0.5 input_price: 0.5
@ -937,6 +998,22 @@
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.14 input_price: 0.14
output_price: 0.28 output_price: 0.28
- name: perplexity/llama-3-sonar-small-32k-chat
max_input_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: perplexity/llama-3-sonar-small-32k-online
max_input_tokens: 28000
input_price: 0.2
output_price: 0.2
- name: perplexity/llama-3-sonar-large-32k-chat
max_input_tokens: 32768
input_price: 1
output_price: 1
- name: perplexity/llama-3-sonar-large-32k-online
max_input_tokens: 28000
input_price: 1
output_price: 1
- platform: octoai - platform: octoai
@ -964,10 +1041,15 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.86 input_price: 0.86
output_price: 0.86 output_price: 0.86
- name: thenlper/gte-large
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- platform: together - platform: together
# docs: # docs:
# - https://docs.together.ai/docs/inference-models # - https://docs.together.ai/docs/inference-models
# - https://docs.together.ai/docs/embedding-models
# - https://www.together.ai/pricing # - https://www.together.ai/pricing
models: models:
- name: meta-llama/Llama-3-8b-chat-hf - name: meta-llama/Llama-3-8b-chat-hf
@ -990,7 +1072,35 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 1.2 input_price: 1.2
output_price: 1.2 output_price: 1.2
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: Qwen/Qwen2-72B-Instruct - name: Qwen/Qwen2-72B-Instruct
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.9 input_price: 0.9
output_price: 0.9 output_price: 0.9
- name: togethercomputer/m2-bert-80M-2k-retrieval
mode: embedding
max_input_tokens: 2048
default_chunk_size: 1500
- name: togethercomputer/m2-bert-80M-8k-retrieval
mode: embedding
max_input_tokens: 8192
default_chunk_size: 1500
- name: togethercomputer/m2-bert-80M-32k-retrieval
mode: embedding
max_input_tokens: 8192
default_chunk_size: 1500
- name: WhereIsAI/UAE-Large-V1
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: BAAI/bge-large-en-v1.5
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000
- name: BAAI/bge-base-en-v1.5
mode: embedding
max_input_tokens: 512
default_chunk_size: 1000