- type: openai # docs: # - https://platform.openai.com/docs/models # - https://openai.com/pricing # - https://platform.openai.com/docs/api-reference/chat models: - name: gpt-3.5-turbo max_input_tokens: 16385 max_output_tokens?: 4096 input_price: 0.5 output_price: 1.5 - name: gpt-3.5-turbo-1106 max_input_tokens: 16385 max_output_tokens?: 4096 input_price: 1 output_price: 2 - name: gpt-4-turbo max_input_tokens: 128000 max_output_tokens?: 4096 input_price: 10 output_price: 30 supports_vision: true - name: gpt-4-turbo-preview max_input_tokens: 128000 max_output_tokens?: 4096 input_price: 10 output_price: 30 - name: gpt-4-1106-preview max_input_tokens: 128000 max_output_tokens?: 4096 input_price: 10 output_price: 30 - name: gpt-4-vision-preview max_input_tokens: 128000 max_output_tokens: 4096 input_price: 10 output_price: 30 supports_vision: true - name: gpt-4 max_input_tokens: 8192 max_output_tokens?: 4096 input_price: 30 output_price: 60 - name: gpt-4-32k max_input_tokens: 32768 max_output_tokens?: 4096 input_price: 60 output_price: 120 - type: gemini docs: # - https://ai.google.dev/models/gemini # - https://ai.google.dev/pricing # - https://ai.google.dev/api/rest/v1beta/models/streamGenerateContent models: - name: gemini-1.0-pro-latest max_input_tokens: 30720 max_output_tokens?: 2048 input_price: 0.5 output_price: 1.5 - name: gemini-1.0-pro-vision-latest max_input_tokens: 12288 max_output_tokens?: 4096 input_price: 0.5 output_price: 1.5 supports_vision: true - name: gemini-1.5-pro-latest max_input_tokens: 1048576 max_output_tokens?: 8192 input_price: 7 output_price: 21 supports_vision: true - type: claude # docs: # - https://docs.anthropic.com/claude/docs/models-overview # - https://docs.anthropic.com/claude/reference/messages-streaming models: - name: claude-3-opus-20240229 max_input_tokens: 200000 max_output_tokens: 4096 input_price: 15 output_price: 75 supports_vision: true - name: claude-3-sonnet-20240229 max_input_tokens: 200000 max_output_tokens: 4096 input_price: 3 output_price: 15 supports_vision: true - name: claude-3-haiku-20240307 max_input_tokens: 200000 max_output_tokens: 4096 input_price: 0.25 output_price: 1.25 supports_vision: true - type: mistral # docs: # - https://docs.mistral.ai/platform/endpoints/ # - https://mistral.ai/technology/#pricing # - https://docs.mistral.ai/api/ models: - name: open-mistral-7b max_input_tokens: 32000 max_output_tokens?: 8191 input_price: 0.25 output_price: 0.25 - name: open-mixtral-8x7b max_input_tokens: 32000 max_output_tokens?: 8191 input_price: 0.7 output_price: 0.7 - name: open-mixtral-8x22b max_input_tokens: 64000 max_output_tokens?: 8191 input_price: 2 output_price: 6 - name: mistral-small-latest max_input_tokens: 32000 max_output_tokens?: 8191 input_price: 2 output_price: 6 - name: mistral-medium-latest max_input_tokens: 32000 max_output_tokens?: 8191 input_price: 2.7 output_price: 8.1 - name: mistral-large-latest max_input_tokens: 32000 max_output_tokens?: 8191 input_price: 8 output_price: 24 - type: cohere # docs: # - https://docs.cohere.com/docs/command-r # - https://docs.cohere.com/docs/command-r-plus # - https://cohere.com/pricing # - https://docs.cohere.com/reference/chat models: - name: command-r max_input_tokens: 128000 max_output_tokens?: 4096 input_price: 0.5 output_price: 1.5 - name: command-r-plus max_input_tokens: 128000 max_output_tokens?: 4096 input_price: 3 output_price: 15 - type: perplexity # docs: # - https://docs.perplexity.ai/docs/model-cards # - https://docs.perplexity.ai/docs/pricing # - https://docs.perplexity.ai/reference/post_chat_completions models: - name: sonar-small-chat max_input_tokens: 16384 - name: sonar-small-online max_input_tokens: 12000 - name: sonar-medium-chat max_input_tokens: 16384 - name: sonar-medium-online max_input_tokens: 12000 - name: llama-3-8b-instruct max_input_tokens: 8192 max_output_tokens?: 8192 input_price: 0.2 output_price: 0.2 - name: llama-3-70b-instruct max_input_tokens: 8192 max_output_tokens?: 8192 input_price: 1 output_price: 1 - name: codellama-70b-instruct max_input_tokens: 16384 max_output_tokens?: 4096 input_price: 1 output_price: 1 - name: mistral-7b-instruct max_input_tokens: 16384 max_output_tokens?: 8191 input_price: 0.2 output_price: 0.2 - name: mixtral-8x7b-instruct max_input_tokens: 16384 max_output_tokens?: 8191 input_price: 0.6 output_price: 0.6 - name: mixtral-8x22b-instruct max_input_tokens: 16384 max_output_tokens?: 8191 input_price: 1 output_price: 1 - type: groq # docs: # - https://console.groq.com/docs/models # - https://console.groq.com/docs/text-chat models: - name: llama3-8b-8192 max_input_tokens: 8192 max_output_tokens: 8192 - name: llama3-70b-8192 max_input_tokens: 8192 max_output_tokens: 8192 - name: llama2-70b-4096 max_input_tokens: 4096 max_output_tokens: 4096 - name: mixtral-8x7b-32768 max_input_tokens: 32768 max_output_tokens: 32768 - name: gemma-7b-it max_input_tokens: 8192 max_output_tokens: 8192 - type: vertexai # docs: # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models # - https://cloud.google.com/vertex-ai/generative-ai/pricing # - https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini models: - name: gemini-1.0-pro max_input_tokens: 24568 max_output_tokens: 24568 input_price: 0.125 output_price: 0.375 - name: gemini-1.0-pro-vision max_input_tokens: 14336 max_output_tokens: 14336 input_price: 0.125 output_price: 0.375 supports_vision: true - name: gemini-1.5-pro-preview-0409 max_input_tokens: 1000000 max_output_tokens: 1000000 input_price: 2.5 output_price: 7.5 supports_vision: true - type: ernie # docs: # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 models: - name: ernie-4.0-8k-preview max_input_tokens: 5120 max_output_tokens: 2048 input_price: 16.8 output_price: 16.8 - name: ernie-3.5-8k-preview max_input_tokens: 5120 max_output_tokens: 2048 input_price: 1.68 output_price: 1.68 - name: ernie-speed-128k max_input_tokens: 124000 max_output_tokens: 4096 input_price: 0.56 output_price: 1.12 - name: ernie-lite-8k max_input_tokens: 7168 max_output_tokens: 2048 input_price: 0.42 output_price: 0.84 - name: ernie-tiny-8k max_input_tokens: 7168 max_output_tokens: 2048 input_price: 0.14 output_price: 0.14 - type: qianwen # docs: # - https://help.aliyun.com/zh/dashscope/developer-reference/tongyiqianwen-large-language-models/ # - https://help.aliyun.com/zh/dashscope/developer-reference/qwen-vl-plus/ models: - name: qwen-turbo max_input_tokens: 6000 max_output_tokens?: 1500 input_price: 1.12 output_price: 1.12 - name: qwen-plus max_input_tokens: 30000 max_output_tokens?: 2000 input_price: 2.8 output_price: 2.8 - name: qwen-max max_input_tokens: 6000 max_output_tokens?: 2000 input_price: 16.8 output_price: 16.8 - name: qwen-max-longcontext max_input_tokens: 28000 max_output_tokens?: 2000 - name: qwen-vl-plus input_price: 1.12 output_price: 1.12 supports_vision: true - name: qwen-vl-max input_price: 2.8 output_price: 2.8 supports_vision: true - type: moonshot # docs: # - https://platform.moonshot.cn/docs/intro # - https://platform.moonshot.cn/docs/pricing # - https://platform.moonshot.cn/docs/api-reference models: - name: moonshot-v1-8k max_input_tokens: 8000 input_price: 1.68 output_price: 1.68 - name: moonshot-v1-32k max_input_tokens: 32000 input_price: 3.36 output_price: 3.36 - name: moonshot-v1-128k max_input_tokens: 128000 input_price: 8.4 output_price: 8.4