refactor: update models.yaml (#739)

2 months ago · cf9d06f51e
parent 1a1798c893
commit cf9d06f51e
1 changed files with 224 additions and 219 deletions
--- a/models.yaml
+++ b/models.yaml
@ -133,30 +133,14 @@
  # notes:
  #   - unable to get max_output_tokens info
  models:
-    - name: open-mistral-nemo-2407
-      max_input_tokens: 128000
-      input_price: 0.3
-      output_price: 0.3
-    - name: open-mistral-7b
-      max_input_tokens: 32000
-      input_price: 0.25
-      output_price: 0.25
-    - name: open-mixtral-8x7b
+    - name: mistral-large-latest
      max_input_tokens: 32000
-      input_price: 0.7
-      output_price: 0.7
-    - name: open-mixtral-8x22b
-      max_input_tokens: 64000
-      input_price: 2
-      output_price: 6
+      input_price: 4
+      output_price: 12
    - name: mistral-small-latest
      max_input_tokens: 32000
      input_price: 1
      output_price: 3
-    - name: mistral-large-latest
-      max_input_tokens: 32000
-      input_price: 4
-      output_price: 12
    - name: codestral-latest
      max_input_tokens: 32000
      input_price: 1
@ -165,6 +149,19 @@
      max_input_tokens: 256000
      input_price: 0.25
      output_price: 0.25
+    - name: open-mistral-nemo
+      max_input_tokens: 128000
+      input_price: 0.3
+      output_price: 0.3
+      supports_function_calling: true
+    - name: open-mixtral-8x22b
+      max_input_tokens: 64000
+      input_price: 2
+      output_price: 6
+    - name: open-mixtral-8x7b
+      max_input_tokens: 32000
+      input_price: 0.7
+      output_price: 0.7
    - name: mistral-embed
      type: embedding
      input_price: 0.1
@ -178,16 +175,16 @@
  #   - https://cohere.com/pricing
  #   - https://docs.cohere.com/reference/chat
  models:
-    - name: command-r
-      max_input_tokens: 128000
-      input_price: 0.5
-      output_price: 1.5
-      supports_function_calling: true
    - name: command-r-plus
      max_input_tokens: 128000
      input_price: 3
      output_price: 15
      supports_function_calling: true
+    - name: command-r
+      max_input_tokens: 128000
+      input_price: 0.5
+      output_price: 1.5
+      supports_function_calling: true
    - name: embed-english-v3.0
      type: embedding
      max_input_tokens: 512
@ -215,30 +212,22 @@
  #   - https://docs.perplexity.ai/docs/pricing
  #   - https://docs.perplexity.ai/reference/post_chat_completions
  models:
-    - name: llama-3-sonar-small-32k-chat
-      max_input_tokens: 32768
-      input_price: 0.2
-      output_price: 0.2
    - name: llama-3-sonar-small-32k-online
      max_input_tokens: 28000
      input_price: 0.2
      output_price: 0.2
-    - name: llama-3-sonar-large-32k-chat	
-      max_input_tokens: 32768
-      input_price: 1
-      output_price: 1
    - name: llama-3-sonar-large-32k-online
      max_input_tokens: 28000
      input_price: 1
      output_price: 1
-    - name: llama-3-8b-instruct
-      max_input_tokens: 8192
-      input_price: 0.2
-      output_price: 0.2
    - name: llama-3-70b-instruct
      max_input_tokens: 8192
      input_price: 1
      output_price: 1
+    - name: llama-3-8b-instruct
+      max_input_tokens: 8192
+      input_price: 0.2
+      output_price: 0.2
    - name: mixtral-8x7b-instruct
      max_input_tokens: 16384
      input_price: 0.6
@ -252,32 +241,41 @@
  # notes:
  #   - all models are free with rate limits
  models:
-    - name: llama3-8b-8192
-      max_input_tokens: 8192
-      input_price: 0
-      output_price: 0
    - name: llama3-70b-8192
      max_input_tokens: 8192
-      input_price: 0
-      output_price: 0
-    - name: llama3-groq-8b-8192-tool-use-preview
+      input_price: 0.59
+      output_price: 0.79
+      supports_function_calling: true
+    - name: llama3-8b-8192
      max_input_tokens: 8192
-      input_price: 0
-      output_price: 0
+      input_price: 0.05
+      output_price: 0.08
      supports_function_calling: true
    - name: llama3-groq-70b-8192-tool-use-preview
      max_input_tokens: 8192
-      input_price: 0
-      output_price: 0
+      input_price: 0.89
+      output_price: 0.89
+      supports_function_calling: true
+    - name: llama3-groq-8b-8192-tool-use-preview
+      max_input_tokens: 8192
+      input_price: 0.19
+      output_price: 0.19
      supports_function_calling: true
    - name: mixtral-8x7b-32768
      max_input_tokens: 32768
-      input_price: 0
-      output_price: 0
+      input_price: 0.24
+      output_price: 0.24
    - name: gemma2-9b-it
      max_input_tokens: 8192
-      input_price: 0
-      output_price: 0
+      input_price: 0.2
+      output_price: 0.2
+      supports_function_calling: true
+    - name: llama-3.1-405b-reasoning
+      max_input_tokens: 16384
+    - name: llama-3.1-70b-versatile
+      max_input_tokens: 8192
+    - name: llama-3.1-8b-instant
+      max_input_tokens: 8192

 - platform: vertexai
  # docs:
@ -307,13 +305,6 @@
      input_price: 0.125
      output_price: 0.375
      supports_function_calling: true
-    - name: textembedding-gecko@003
-      type: embedding
-      max_input_tokens: 3072
-      input_price: 0.025
-      output_vector_size: 2048
-      default_chunk_size: 3000
-      max_batch_size: 5
    - name: text-embedding-004
      type: embedding
      max_input_tokens: 3072
@ -407,11 +398,21 @@
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true
-    - name: meta.llama3-8b-instruct-v1:0
-      max_input_tokens: 8192
+    - name: meta.llama3-1-405b-instruct-v1:0
+      max_input_tokens: 128000
+      max_output_tokens: 2048
+      require_max_tokens: true
+    - name: meta.llama3-1-70b-instruct-v1:0
+      max_input_tokens: 128000
      max_output_tokens: 2048
      require_max_tokens: true
-      input_price: 0.4
+      input_price: 2.65
+      output_price: 3.5
+    - name: meta.llama3-1-8b-instruct-v1:0
+      max_input_tokens: 128000
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0.3
      output_price: 0.6
    - name: meta.llama3-70b-instruct-v1:0
      max_input_tokens: 8192
@ -419,49 +420,36 @@
      require_max_tokens: true
      input_price: 2.65
      output_price: 3.5
-    - name: mistral.mistral-7b-instruct-v0:2
+    - name: meta.llama3-8b-instruct-v1:0
+      max_input_tokens: 8192
+      max_output_tokens: 2048
+      require_max_tokens: true
+      input_price: 0.3
+      output_price: 0.6
+    - name: mistral.mistral-large-2402-v1:0
      max_input_tokens: 32000
      max_output_tokens: 8192
      require_max_tokens: true
-      input_price: 0.15
-      output_price: 0.2
+      input_price: 8
+      output_price: 2.4
    - name: mistral.mixtral-8x7b-instruct-v0:1
      max_input_tokens: 32000
      max_output_tokens: 8192
      require_max_tokens: true
      input_price: 0.45
      output_price: 0.7
-    - name: mistral.mistral-large-2402-v1:0
-      max_input_tokens: 32000
-      max_output_tokens: 8192
-      require_max_tokens: true
-      input_price: 8
-      output_price: 2.4

 - platform: cloudflare
  # docs:
  #   - https://developers.cloudflare.com/workers-ai/models/
-  #   - https://developers.cloudflare.com/workers-ai/platform/pricing/
  models:
-    - name: '@cf/meta/llama-3-8b-instruct'
-      max_input_tokens: 6144
-      max_output_tokens: 2048
-      require_max_tokens: true
-      input_price: 0
-      output_price: 0
-    - name: '@hf/mistral/mistral-7b-instruct-v0.2'
-      max_input_tokens: 6144
-      max_output_tokens: 2048
-      require_max_tokens: true
-      input_price: 0
-      output_price: 0
-    - name: '@cf/qwen/qwen1.5-14b-chat-awq'
+    - name: '@cf/meta/llama-3.1-8b-instruct'
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
      input_price: 0
      output_price: 0
-    - name: '@cf/google/gemma-7b-it'
+    - name: '@cf/meta/llama-3-8b-instruct'
      max_input_tokens: 6144
      max_output_tokens: 2048
      require_max_tokens: true
@ -481,6 +469,11 @@
  #   - https://replicate.com/pricing
  #   - https://replicate.com/docs/reference/http
  models:
+    - name: meta/meta-llama-3.1-405b-instruct
+      max_input_tokens: 128000
+      max_output_tokens: 4096
+      input_price: 9.5
+      output_price: 9.5
    - name: meta/meta-llama-3-70b-instruct
      max_input_tokens: 8192
      max_output_tokens: 4096
@ -493,12 +486,6 @@
      require_max_tokens: true
      input_price: 0.05
      output_price: 0.25
-    - name: mistralai/mistral-7b-instruct-v0.2
-      max_input_tokens: 32000
-      max_output_tokens: 8192
-      require_max_tokens: true
-      input_price: 0.05
-      output_price: 0.25
    - name: mistralai/mixtral-8x7b-instruct-v0.1
      max_input_tokens: 32000
      max_output_tokens: 8192
@ -711,42 +698,47 @@
  #   - https://deepinfra.com/models
  #   - https://deepinfra.com/pricing
  models:
-    - name: meta-llama/Meta-Llama-3-8B-Instruct
-      max_input_tokens: 8192
-      input_price: 0.08
-      output_price: 0.08
-      supports_function_calling: true
+    - name: meta-llama/Meta-Llama-3.1-70B-Instruct
+      max_input_tokens: 128000
+      input_price: 0.52
+      output_price: 0.75
+    - name: meta-llama/Meta-Llama-3.1-8B-Instruct
+      max_input_tokens: 128000
+      input_price: 0.09
+      output_price: 0.09
    - name: meta-llama/Meta-Llama-3-70B-Instruct
      max_input_tokens: 8192
      input_price: 0.59
      output_price: 0.79
      supports_function_calling: true
-    - name: mistralai/Mistral-7B-Instruct-v0.3
-      max_input_tokens: 32768
-      input_price: 0.07
-      output_price: 0.07
-    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
-      max_input_tokens: 32768
-      input_price: 0.24
-      output_price: 0.24
+    - name: meta-llama/Meta-Llama-3-8B-Instruct
+      max_input_tokens: 8192
+      input_price: 0.08
+      output_price: 0.08
      supports_function_calling: true
    - name: mistralai/Mixtral-8x22B-Instruct-v0.1
      max_input_tokens: 65536
      input_price: 0.65
      output_price: 0.65
      supports_function_calling: true
-    - name: google/gemma-1.1-7b-it
+    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
+      max_input_tokens: 32768
+      input_price: 0.24
+      output_price: 0.24
+      supports_function_calling: true
+    - name: google/gemma-2-27b-it
      max_input_tokens: 8192
-      input_price: 0.07
-      output_price: 0.07
+      input_price: 0.27
+      output_price: 0.27
+    - name: google/gemma-2-9b-it
+      max_input_tokens: 8192
+      input_price: 0.09
+      output_price: 0.09
    - name: Qwen/Qwen2-72B-Instruct
      max_input_tokens: 32768
      input_price: 0.59
      output_price: 0.79
-    - name: microsoft/Phi-3-medium-4k-instruct
-      max_input_tokens: 4096
-      input_price: 0.14
-      output_price: 0.14
+      supports_function_calling: true
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      max_input_tokens: 512
@ -788,53 +780,57 @@
  #   - https://fireworks.ai/models
  #   - https://fireworks.ai/pricing
  models:
-    - name: accounts/fireworks/models/firellava-13b
-      max_input_tokens: 4096
-      input_price: 0.2
-      output_price: 0.2
-      supports_vision: true
-    - name: accounts/fireworks/models/firefunction-v2
-      max_input_tokens: 32768
-      input_price: 0.2
-      output_price: 0.2
-      supports_function_calling: true
-    - name: accounts/fireworks/models/llama-v3-8b-instruct
-      max_input_tokens: 8192
+    - name: accounts/fireworks/models/llama-v3p1-405b-instruct
+      max_input_tokens: 131072
+      input_price: 3
+      output_price: 3
+    - name: accounts/fireworks/models/llama-v3p1-70b-instruct
+      max_input_tokens: 131072
+      input_price: 0.9
+      output_price: 0.9
+    - name: accounts/fireworks/models/llama-v3p1-8b-instruct
+      max_input_tokens: 131072
      input_price: 0.2
      output_price: 0.2
    - name: accounts/fireworks/models/llama-v3-70b-instruct
      max_input_tokens: 8192
      input_price: 0.9
      output_price: 0.9
-    - name: accounts/fireworks/models/mistral-7b-instruct-v3
-      max_input_tokens: 32768
+    - name: accounts/fireworks/models/llama-v3-8b-instruct
+      max_input_tokens: 8192
      input_price: 0.2
      output_price: 0.2
+    - name: accounts/fireworks/models/mixtral-8x22b-instruct
+      max_input_tokens: 65536
+      input_price: 0.9
+      output_price: 0.9
    - name: accounts/fireworks/models/mixtral-8x7b-instruct
      max_input_tokens: 32768
      input_price: 0.5
      output_price: 0.5
-    - name: accounts/fireworks/models/mixtral-8x22b-instruct
-      max_input_tokens: 65536
+    - name: accounts/fireworks/models/qwen2-72b-instruct
+      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
    - name: accounts/fireworks/models/gemma2-9b-it
      max_input_tokens: 8192
      input_price: 0.2
      output_price: 0.2
-    - name: accounts/fireworks/models/qwen2-72b-instruct
-      max_input_tokens: 32768
-      input_price: 0.9
-      output_price: 0.9
-    - name: accounts/fireworks/models/phi-3-mini-128k-instruct
+    - name: accounts/fireworks/models/phi-3-vision-128k-instruct
      max_input_tokens: 131072
      input_price: 0.2
      output_price: 0.2
-    - name: accounts/fireworks/models/phi-3-vision-128k-instruct
-      max_input_tokens: 131072
+      supports_vision: true
+    - name: accounts/fireworks/models/firellava-13b
+      max_input_tokens: 4096
      input_price: 0.2
      output_price: 0.2
      supports_vision: true
+    - name: accounts/fireworks/models/firefunction-v2
+      max_input_tokens: 32768
+      input_price: 0.2
+      output_price: 0.2
+      supports_function_calling: true
    - name: nomic-ai/nomic-embed-text-v1.5
      type: embedding
      max_input_tokens: 8192
@ -861,26 +857,34 @@
  # docs:
  #   - https://openrouter.ai/docs#models
  models:
-    - name: meta-llama/llama-3-8b-instruct
-      max_input_tokens: 8192
-      input_price: 0.07
-      output_price: 0.07
+    - name: meta-llama/llama-3.1-405b-instruct
+      max_input_tokens: 131072
+      input_price: 3
+      output_price: 3
+    - name: meta-llama/llama-3.1-70b-instruct
+      max_input_tokens: 131072
+      input_price: 0.75
+      output_price: 0.75
+    - name: meta-llama/llama-3.1-8b-instruct
+      max_input_tokens: 131072
+      input_price: 0.09
+      output_price: 0.09
    - name: meta-llama/llama-3-70b-instruct
      max_input_tokens: 8192
      input_price: 0.59
      output_price: 0.79
-    - name: microsoft/phi-3-mini-128k-instruct
-      max_input_tokens: 128000
-      input_price: 0.1
-      output_price: 0.1
-    - name: microsoft/phi-3-medium-4k-instruct
-      max_input_tokens: 4000
-      input_price: 0.14
-      output_price: 0.14
+    - name: meta-llama/llama-3-8b-instruct
+      max_input_tokens: 8192
+      input_price: 0.07
+      output_price: 0.07
    - name: microsoft/phi-3-medium-128k-instruct
      max_input_tokens: 128000
      input_price: 1
      output_price: 1
+    - name: microsoft/phi-3-mini-128k-instruct
+      max_input_tokens: 128000
+      input_price: 0.1
+      output_price: 0.1
    - name: qwen/qwen-2-72b-instruct
      max_input_tokens: 32768
      input_price: 0.9
@ -961,36 +965,41 @@
      output_price: 1.25
      supports_vision: true
      supports_function_calling: true
-    - name: mistralai/mistral-7b-instruct-v0.3
-      max_input_tokens: 32768
-      input_price: 0.07
-      output_price: 0.07
-    - name: mistralai/mixtral-8x7b-instruct
-      max_input_tokens: 32768
-      input_price: 0.24
-      output_price: 0.24
-    - name: mistralai/mixtral-8x22b-instruct
-      max_input_tokens: 65536
-      input_price: 0.65
-      output_price: 0.65
-    - name: mistralai/mistral-small
-      max_input_tokens: 32000
-      input_price: 2
-      output_price: 6
    - name: mistralai/mistral-large
      max_input_tokens: 32000
      input_price: 8
      output_price: 24
-    - name: cohere/command-r
+    - name: mistralai/mistral-small
+      max_input_tokens: 32000
+      input_price: 2
+      output_price: 6
+    - name: mistralai/codestral-mamba
+      max_input_tokens: 256000
+      input_price: 0.25
+      output_price: 0.25
+    - name: mistralai/mistral-nemo
      max_input_tokens: 128000
-      input_price: 0.5
-      output_price: 1.5
+      input_price: 0.18
+      output_price: 0.18
      supports_function_calling: true
+    - name: mistralai/mixtral-8x22b-instruct
+      max_input_tokens: 65536
+      input_price: 0.65
+      output_price: 0.65
+    - name: mistralai/mixtral-8x7b-instruct
+      max_input_tokens: 32768
+      input_price: 0.24
+      output_price: 0.24
    - name: cohere/command-r-plus
      max_input_tokens: 128000
      input_price: 3
      output_price: 15
      supports_function_calling: true
+    - name: cohere/command-r
+      max_input_tokens: 128000
+      input_price: 0.5
+      output_price: 1.5
+      supports_function_calling: true
    - name: deepseek/deepseek-chat 
      max_input_tokens: 32768 
      input_price: 0.14
@ -999,18 +1008,10 @@
      max_input_tokens: 32768
      input_price: 0.14
      output_price: 0.28
-    - name: perplexity/llama-3-sonar-small-32k-chat
-      max_input_tokens: 32768
-      input_price: 0.2
-      output_price: 0.2
    - name: perplexity/llama-3-sonar-small-32k-online
      max_input_tokens: 28000
      input_price: 0.2
      output_price: 0.2
-    - name: perplexity/llama-3-sonar-large-32k-chat
-      max_input_tokens: 32768
-      input_price: 1
-      output_price: 1
    - name: perplexity/llama-3-sonar-large-32k-online
      max_input_tokens: 28000
      input_price: 1
@ -1023,28 +1024,36 @@
 - platform: octoai
  # docs:
  #   - https://octo.ai/docs/getting-started/inference-models
-  #   - https://octo.ai/pricing/text-gen-solution/
+  #   - https://octo.ai/docs/getting-started/pricing-and-billing
  models:
-    - name: meta-llama-3-8b-instruct
-      max_input_tokens: 8192
-      input_price: 0.13
-      output_price: 0.13
+    - name: meta-llama-3.1-405b-instruct
+      max_input_tokens: 131072
+      input_price: 3
+      output_price: 9
+    - name: meta-llama-3.1-70b-instruct
+      max_input_tokens: 131072
+      input_price: 0.9
+      output_price: 0.9
+    - name: meta-llama-3.1-8b-instruct
+      max_input_tokens: 131072
+      input_price: 0.15
+      output_price: 0.15
    - name: meta-llama-3-70b-instruct
      max_input_tokens: 8192
-      input_price: 0.86
-      output_price: 0.86
-    - name: mistral-7b-instruct
-      max_input_tokens: 32768
-      input_price: 0.13
-      output_price: 0.13
-    - name: mixtral-8x7b-instruct
-      max_input_tokens: 32768
-      input_price: 0.34
-      output_price: 0.34
+      input_price: 0.9
+      output_price: 0.9
+    - name: meta-llama-3-8b-instruct
+      max_input_tokens: 8192
+      input_price: 0.15
+      output_price: 0.15
    - name: mixtral-8x22b-instruct
      max_input_tokens: 65536
-      input_price: 0.86
-      output_price: 0.86
+      input_price: 1.2
+      output_price: 1.2
+    - name: mixtral-8x7b-instruct
+      max_input_tokens: 32768
+      input_price: 0.45
+      output_price: 0.45
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
@ -1059,35 +1068,38 @@
  #   - https://docs.together.ai/docs/embedding-models
  #   - https://www.together.ai/pricing
  models:
-    - name: meta-llama/Llama-3-8b-chat-hf
-      max_input_tokens: 8000
-      input_price: 0.2
-      output_price: 0.2
-    - name: meta-llama/Llama-3-70b-chat-hf
-      max_input_tokens: 8000
-      input_price: 0.9
-      output_price: 0.9
-    - name: mistralai/Mistral-7B-Instruct-v0.3
-      max_input_tokens: 32768
-      input_price: 0.2
-      output_price: 0.2
-    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
+    - name: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
      max_input_tokens: 32768
-      input_price: 0.9
-      output_price: 0.9
+      input_price: 5
+      output_price: 5
+    - name: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+      max_input_tokens: 8192
+      input_price: 0.88
+      output_price: 0.88
+    - name: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+      max_input_tokens: 8192
+      input_price: 0.18
+      output_price: 0.18
+    - name: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
+      max_input_tokens: 8192
+      input_price: 0.88
+      output_price: 0.88
+    - name: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
+      max_input_tokens: 8192
+      input_price: 0.18
+      output_price: 0.18
    - name: mistralai/Mixtral-8x22B-Instruct-v0.1
      max_input_tokens: 65536
      input_price: 1.2
      output_price: 1.2
-    - name: google/gemma-7b-it
-      max_input_tokens: 8192
-      input_price: 0.2
-      output_price: 0.2
+    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
+      max_input_tokens: 32768
+      input_price: 0.9
+      output_price: 0.9
    - name: Qwen/Qwen2-72B-Instruct
      max_input_tokens: 32768
      input_price: 0.9
      output_price: 0.9
-      max_batch_size: 100
    - name: WhereIsAI/UAE-Large-V1
      type: embedding
      max_input_tokens: 512
@ -1129,13 +1141,6 @@
      output_vector_size: 768
      default_chunk_size: 1500
      max_batch_size: 100
-    - name: jina-embeddings-v2-base-code
-      type: embedding
-      max_input_tokens: 8192
-      input_price: 0.02
-      output_vector_size: 768
-      default_chunk_size: 1500
-      max_batch_size: 100
    - name: jina-colbert-v1-en
      type: embedding
      max_input_tokens: 8192
@ -1143,9 +1148,9 @@
      output_vector_size: 768
      default_chunk_size: 1500
      max_batch_size: 100
-    - name: jina-reranker-v1-base-multilingual
+    - name: jina-reranker-v2-base-multilingual
      type: reranker
-      max_input_tokens: 8192
+      max_input_tokens: 1024
      input_price: 0.02
    - name: jina-reranker-v1-base-en
      type: reranker