From 6d05afc81b53f2fbe2908058e5afdb245c6e780a Mon Sep 17 00:00:00 2001 From: sigoden Date: Fri, 21 Jun 2024 06:51:47 +0000 Subject: [PATCH] refactor: update models.yaml --- models.yaml | 302 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 206 insertions(+), 96 deletions(-) diff --git a/models.yaml b/models.yaml index f792ab8..dc8a659 100644 --- a/models.yaml +++ b/models.yaml @@ -23,49 +23,26 @@ output_price: 30 supports_vision: true supports_function_calling: true - - name: gpt-4-turbo-preview - max_input_tokens: 128000 - max_output_tokens: 4096 - input_price: 10 - output_price: 30 - supports_function_calling: true - - name: gpt-4-1106-preview - max_input_tokens: 128000 - max_output_tokens: 4096 - input_price: 10 - output_price: 30 - supports_function_calling: true - - name: gpt-4 - max_input_tokens: 8192 - max_output_tokens: 4096 - input_price: 30 - output_price: 60 - name: gpt-3.5-turbo max_input_tokens: 16385 max_output_tokens: 4096 input_price: 0.5 output_price: 1.5 supports_function_calling: true - - name: gpt-3.5-turbo-1106 - max_input_tokens: 16385 - max_output_tokens: 4096 - input_price: 1 - output_price: 2 - supports_function_calling: true - name: text-embedding-3-large mode: embedding max_input_tokens: 8191 - default_chunk_size: 4000 + default_chunk_size: 3000 max_concurrent_chunks: 100 - name: text-embedding-3-small mode: embedding max_input_tokens: 8191 - default_chunk_size: 4000 + default_chunk_size: 3000 max_concurrent_chunks: 100 - name: text-embedding-ada-002 mode: embedding max_input_tokens: 8191 - default_chunk_size: 4000 + default_chunk_size: 3000 max_concurrent_chunks: 100 - platform: gemini @@ -99,7 +76,7 @@ - name: text-embedding-004 mode: embedding max_input_tokens: 2048 - default_chunk_size: 2000 + default_chunk_size: 1500 - platform: claude # docs: @@ -176,25 +153,21 @@ - name: mistral-embed mode: embedding max_input_tokens: 8092 - default_chunk_size: 4000 + default_chunk_size: 2000 - platform: cohere # docs: # - https://docs.cohere.com/docs/command-r # - https://cohere.com/pricing # - https://docs.cohere.com/reference/chat - # notes - # - get max_output_tokens info from api error models: - name: command-r max_input_tokens: 128000 - max_output_tokens: 4000 input_price: 0.5 output_price: 1.5 supports_function_calling: true - name: command-r-plus max_input_tokens: 128000 - max_output_tokens: 4000 input_price: 3 output_price: 15 supports_function_calling: true @@ -242,32 +215,33 @@ # - https://docs.perplexity.ai/docs/model-cards # - https://docs.perplexity.ai/docs/pricing # - https://docs.perplexity.ai/reference/post_chat_completions - # notes - # - get max_output_tokens info from api error models: - name: llama-3-sonar-small-32k-chat max_input_tokens: 32768 - max_output_tokens: 32768 + input_price: 0.2 + output_price: 0.2 + - name: llama-3-sonar-small-32k-online + max_input_tokens: 28000 input_price: 0.2 output_price: 0.2 - name: llama-3-sonar-large-32k-chat max_input_tokens: 32768 - max_output_tokens: 32768 - input_price: 0.6 - output_price: 0.6 + input_price: 1 + output_price: 1 + - name: llama-3-sonar-large-32k-online + max_input_tokens: 28000 + input_price: 1 + output_price: 1 - name: llama-3-8b-instruct max_input_tokens: 8192 - max_output_tokens: 8192 input_price: 0.2 output_price: 0.2 - name: llama-3-70b-instruct max_input_tokens: 8192 - max_output_tokens: 8192 input_price: 1 output_price: 1 - name: mixtral-8x7b-instruct max_input_tokens: 16384 - max_output_tokens: 16384 input_price: 0.6 output_price: 0.6 @@ -277,24 +251,28 @@ # - https://wow.groq.com # - https://console.groq.com/docs/text-chat # notes: - # - get max_output_tokens info from playgourd # - all models are free with rate limits models: - name: llama3-8b-8192 max_input_tokens: 8192 - max_output_tokens: 8192 input_price: 0.05 - output_price: 0.10 + output_price: 0.08 + supports_function_calling: true - name: llama3-70b-8192 max_input_tokens: 8192 - max_output_tokens: 8192 input_price: 0.59 output_price: 0.79 + supports_function_calling: true - name: mixtral-8x7b-32768 max_input_tokens: 32768 - max_output_tokens: 32768 - input_price: 0.27 - output_price: 0.27 + input_price: 0.24 + output_price: 0.24 + supports_function_calling: true + - name: gemma-7b-it + max_input_tokens: 8192 + input_price: 0.07 + output_price: 0.07 + supports_function_calling: true - platform: vertexai # docs: @@ -327,12 +305,12 @@ - name: text-embedding-004 mode: embedding max_input_tokens: 3072 - default_chunk_size: 3000 + default_chunk_size: 2000 max_concurrent_chunks: 5 - name: text-multilingual-embedding-002 mode: embedding max_input_tokens: 3072 - default_chunk_size: 3000 + default_chunk_size: 2000 max_concurrent_chunks: 5 - platform: vertexai-claude @@ -451,20 +429,32 @@ # - https://developers.cloudflare.com/workers-ai/models/ # - https://developers.cloudflare.com/workers-ai/platform/pricing/ # notes: - # - unable to get max_output_tokens info + # - get max_output_tokens from playground models: - name: '@cf/meta/llama-3-8b-instruct' - max_input_tokens: 4096 - max_output_tokens: 4096 + max_input_tokens: 6144 + max_output_tokens: 2048 require_max_tokens: true - - name: '@cf/mistral/mistral-7b-instruct-v0.2-lora' - max_input_tokens: 4096 - max_output_tokens: 4096 + input_price: 0 + output_price: 0 + - name: '@hf/mistral/mistral-7b-instruct-v0.2' + max_input_tokens: 6144 + max_output_tokens: 2048 require_max_tokens: true + input_price: 0 + output_price: 0 - name: '@cf/qwen/qwen1.5-14b-chat-awq' - max_input_tokens: 4096 - max_output_tokens: 4096 + max_input_tokens: 6144 + max_output_tokens: 2048 require_max_tokens: true + input_price: 0 + output_price: 0 + - name: '@cf/google/gemma-7b-it' + max_input_tokens: 6144 + max_output_tokens: 2048 + require_max_tokens: true + input_price: 0 + output_price: 0 - platform: replicate # docs: @@ -576,7 +566,7 @@ - name: text-embedding-v2 mode: embedding max_input_tokens: 2048 - default_chunk_size: 2000 + default_chunk_size: 1500 max_concurrent_chunks: 5 - platform: moonshot @@ -591,14 +581,17 @@ max_input_tokens: 8000 input_price: 1.68 output_price: 1.68 + supports_function_calling: true - name: moonshot-v1-32k max_input_tokens: 32000 input_price: 3.36 output_price: 3.36 + supports_function_calling: true - name: moonshot-v1-128k max_input_tokens: 128000 input_price: 8.4 output_price: 8.4 + supports_function_calling: true - platform: deepseek # docs: @@ -647,7 +640,7 @@ - name: embedding-2 mode: embedding max_input_tokens: 2048 - default_chunk_size: 2000 + default_chunk_size: 1500 - platform: lingyiwanwu # docs: @@ -686,8 +679,8 @@ - platform: anyscale # docs: - # - https://docs.anyscale.com/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct - # - https://docs.endpoints.anyscale.com/pricing + # - https://docs.anyscale.com/endpoints/text-generation/query-a-model + # - https://www.anyscale.com/pricing-detail models: - name: meta-llama/Meta-Llama-3-8B-Instruct max_input_tokens: 8192 @@ -697,10 +690,6 @@ max_input_tokens: 8192 input_price: 1.0 output_price: 1.0 - - name: codellama/CodeLlama-70b-Instruct-hf - max_input_tokens: 4096 - input_price: 1.0 - output_price: 1.0 - name: mistralai/Mistral-7B-Instruct-v0.1 max_input_tokens: 16384 input_price: 0.15 @@ -713,6 +702,18 @@ max_input_tokens: 65536 input_price: 0.90 output_price: 0.90 + - name: google/gemma-7b-it + max_input_tokens: 8192 + input_price: 0.15 + output_price: 0.15 + - name: BAAI/bge-large-en-v1.5 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: thenlper/gte-large + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 - platform: deepinfra # docs: @@ -723,10 +724,12 @@ max_input_tokens: 8192 input_price: 0.08 output_price: 0.08 + supports_function_calling: true - name: meta-llama/Meta-Llama-3-70B-Instruct max_input_tokens: 8192 input_price: 0.59 output_price: 0.79 + supports_function_calling: true - name: mistralai/Mistral-7B-Instruct-v0.3 max_input_tokens: 32768 input_price: 0.07 @@ -735,10 +738,16 @@ max_input_tokens: 32768 input_price: 0.24 output_price: 0.24 + supports_function_calling: true - name: mistralai/Mixtral-8x22B-Instruct-v0.1 max_input_tokens: 65536 input_price: 0.65 output_price: 0.65 + supports_function_calling: true + - name: google/gemma-1.1-7b-it + max_input_tokens: 8192 + input_price: 0.07 + output_price: 0.07 - name: Qwen/Qwen2-72B-Instruct max_input_tokens: 32768 input_price: 0.59 @@ -747,6 +756,46 @@ max_input_tokens: 4096 input_price: 0.14 output_price: 0.14 + - name: BAAI/bge-large-en-v1.5 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: BAAI/bge-base-en-v1.5 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: BAAI/bge-m3 + mode: embedding + max_input_tokens: 8192 + default_chunk_size: 2000 + - name: intfloat/e5-base-v2 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: intfloat/e5-large-v2 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: intfloat/multilingual-e5-large + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: sentence-transformers/all-MiniLM-L6-v2 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: sentence-transformers/paraphrase-MiniLM-L6-v2 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: thenlper/gte-base + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: thenlper/gte-large + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 - platform: fireworks # docs: @@ -758,7 +807,7 @@ input_price: 0.2 output_price: 0.2 supports_vision: true - - name: accounts/fireworks/models/firefunction-v1 + - name: accounts/fireworks/models/firefunction-v2 max_input_tokens: 32768 input_price: 0.2 output_price: 0.2 @@ -783,6 +832,10 @@ max_input_tokens: 65536 input_price: 0.9 output_price: 0.9 + - name: accounts/fireworks/models/gemma-7b-it + max_input_tokens: 8192 + input_price: 0.2 + output_price: 0.2 - name: accounts/fireworks/models/qwen2-72b-instruct max_input_tokens: 32768 input_price: 0.9 @@ -796,6 +849,22 @@ input_price: 0.2 output_price: 0.2 supports_vision: true + - name: nomic-ai/nomic-embed-text-v1.5 + mode: embedding + max_input_tokens: 8192 + default_chunk_size: 1500 + - name: WhereIsAI/UAE-Large-V1 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: thenlper/gte-large + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: thenlper/gte-base + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 - platform: openrouter # docs: @@ -805,30 +874,22 @@ max_input_tokens: 8192 input_price: 0.07 output_price: 0.07 - - name: meta-llama/llama-3-8b-instruct:nitro - max_input_tokens: 8192 - input_price: 0.2 - output_price: 0.2 - - name: meta-llama/llama-3-8b-instruct:extended - max_input_tokens: 16384 - input_price: 0.2 - output_price: 1.125 - name: meta-llama/llama-3-70b-instruct max_input_tokens: 8192 input_price: 0.59 output_price: 0.79 - - name: meta-llama/llama-3-70b-instruct:nitro - max_input_tokens: 8192 - input_price: 0.9 - output_price: 0.9 - - name: mistralai/mistral-7b-instruct-v0.3 - max_input_tokens: 32768 - input_price: 0.07 - output_price: 0.07 - name: microsoft/phi-3-mini-128k-instruct max_input_tokens: 128000 input_price: 0.1 output_price: 0.1 + - name: microsoft/phi-3-medium-4k-instruct + max_input_tokens: 4000 + input_price: 0.14 + output_price: 0.14 + - name: microsoft/phi-3-medium-128k-instruct + max_input_tokens: 128000 + input_price: 1 + output_price: 1 - name: qwen/qwen-2-72b-instruct max_input_tokens: 32768 input_price: 0.9 @@ -845,15 +906,6 @@ output_price: 30 supports_vision: true supports_function_calling: true - - name: openai/gpt-4-turbo-preview - max_input_tokens: 128000 - input_price: 10 - output_price: 30 - supports_function_calling: true - - name: openai/gpt-4 - max_input_tokens: 8192 - input_price: 30 - output_price: 60 - name: openai/gpt-3.5-turbo max_input_tokens: 16385 input_price: 0.5 @@ -876,6 +928,14 @@ input_price: 0.125 output_price: 0.375 supports_function_calling: true + - name: anthropic/claude-3.5-sonnet + max_input_tokens: 200000 + max_output_tokens: 4096 + require_max_tokens: true + input_price: 3 + output_price: 15 + supports_vision: true + supports_function_calling: true - name: anthropic/claude-3-opus max_input_tokens: 200000 max_output_tokens: 4096 @@ -900,6 +960,10 @@ output_price: 1.25 supports_vision: true supports_function_calling: true + - name: mistralai/mistral-7b-instruct-v0.3 + max_input_tokens: 32768 + input_price: 0.07 + output_price: 0.07 - name: mistralai/mixtral-8x7b-instruct max_input_tokens: 32768 input_price: 0.24 @@ -908,17 +972,14 @@ max_input_tokens: 65536 input_price: 0.65 output_price: 0.65 - supports_function_calling: true - name: mistralai/mistral-small max_input_tokens: 32000 input_price: 2 output_price: 6 - supports_function_calling: true - name: mistralai/mistral-large max_input_tokens: 32000 input_price: 8 output_price: 24 - supports_function_calling: true - name: cohere/command-r max_input_tokens: 128000 input_price: 0.5 @@ -937,6 +998,22 @@ max_input_tokens: 32768 input_price: 0.14 output_price: 0.28 + - name: perplexity/llama-3-sonar-small-32k-chat + max_input_tokens: 32768 + input_price: 0.2 + output_price: 0.2 + - name: perplexity/llama-3-sonar-small-32k-online + max_input_tokens: 28000 + input_price: 0.2 + output_price: 0.2 + - name: perplexity/llama-3-sonar-large-32k-chat + max_input_tokens: 32768 + input_price: 1 + output_price: 1 + - name: perplexity/llama-3-sonar-large-32k-online + max_input_tokens: 28000 + input_price: 1 + output_price: 1 - platform: octoai @@ -964,10 +1041,15 @@ max_input_tokens: 65536 input_price: 0.86 output_price: 0.86 + - name: thenlper/gte-large + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 - platform: together # docs: # - https://docs.together.ai/docs/inference-models + # - https://docs.together.ai/docs/embedding-models # - https://www.together.ai/pricing models: - name: meta-llama/Llama-3-8b-chat-hf @@ -990,7 +1072,35 @@ max_input_tokens: 65536 input_price: 1.2 output_price: 1.2 + - name: google/gemma-7b-it + max_input_tokens: 8192 + input_price: 0.2 + output_price: 0.2 - name: Qwen/Qwen2-72B-Instruct max_input_tokens: 32768 input_price: 0.9 - output_price: 0.9 \ No newline at end of file + output_price: 0.9 + - name: togethercomputer/m2-bert-80M-2k-retrieval + mode: embedding + max_input_tokens: 2048 + default_chunk_size: 1500 + - name: togethercomputer/m2-bert-80M-8k-retrieval + mode: embedding + max_input_tokens: 8192 + default_chunk_size: 1500 + - name: togethercomputer/m2-bert-80M-32k-retrieval + mode: embedding + max_input_tokens: 8192 + default_chunk_size: 1500 + - name: WhereIsAI/UAE-Large-V1 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: BAAI/bge-large-en-v1.5 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 + - name: BAAI/bge-base-en-v1.5 + mode: embedding + max_input_tokens: 512 + default_chunk_size: 1000 \ No newline at end of file