refactor: update models.yaml and abandon anyscale (#701)

2 months ago · 0264ab80ab
parent cee0eb453e
commit 0264ab80ab
5 changed files with 35 additions and 80 deletions
--- a/Argcfile.sh
+++ b/Argcfile.sh
@ -80,7 +80,6 @@ test-server() {

 OPENAI_COMPATIBLE_PLATFORMS=( \
  openai,gpt-3.5-turbo,https://api.openai.com/v1 \
-  anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \
  deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
  deepseek,deepseek-chat,https://api.deepseek.com \
  fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \
--- a/config.example.yaml
+++ b/config.example.yaml
@ -260,12 +260,6 @@ clients:
    name: lingyiwanwu
    api_key: xxx                                      # ENV: {client}_API_KEY

-  # See https://docs.endpoints.anyscale.com/
-  - type: openai-compatible
-    name: anyscale
-    api_base: https://api.endpoints.anyscale.com/v1
-    api_key: xxx                                      # ENV: {client}_API_KEY
-
  # See https://deepinfra.com/docs
  - type: openai-compatible
    name: deepinfra
--- a/models.yaml
+++ b/models.yaml
@ -239,22 +239,21 @@
  models:
    - name: llama3-8b-8192
      max_input_tokens: 8192
-      input_price: 0.05
-      output_price: 0.08
+      input_price: 0
+      output_price: 0
      supports_function_calling: true
    - name: llama3-70b-8192
      max_input_tokens: 8192
-      input_price: 0.59
-      output_price: 0.79
-      supports_function_calling: true
+      input_price: 0
+      output_price: 0
    - name: mixtral-8x7b-32768
      max_input_tokens: 32768
-      input_price: 0.24
-      output_price: 0.24
-    - name: gemma-7b-it
+      input_price: 0
+      output_price: 0
+    - name: gemma2-9b-it
      max_input_tokens: 8192
-      input_price: 0.07
-      output_price: 0.07
+      input_price: 0
+      output_price: 0

 - platform: vertexai
  # docs:
@ -284,6 +283,13 @@
      input_price: 0.125
      output_price: 0.375
      supports_function_calling: true
+    - name: textembedding-gecko@003
+      type: embedding
+      max_input_tokens: 3072
+      input_price: 0.025
+      output_vector_size: 2048
+      default_chunk_size: 3000
+      max_batch_size: 5
    - name: text-embedding-004
      type: embedding
      max_input_tokens: 3072
@ -481,17 +487,17 @@
  #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
  #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
  models:
-    - name: ernie-4.0-turbo-8k
+    - name: ernie-4.0-turbo-8k-preview
      max_input_tokens: 8192
      input_price: 4.2
      output_price: 8.4
      supports_function_calling: true
-    - name: ernie-4.0-8k-0613
+    - name: ernie-4.0-8k-preview
      max_input_tokens: 8192
      input_price: 16.8
      output_price: 16.8
      supports_function_calling: true
-    - name: ernie-3.5-8k-0613
+    - name: ernie-3.5-8k-preview
      max_input_tokens: 8192
      input_price: 1.68
      output_price: 1.68
@ -514,13 +520,6 @@
      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 16
-    - name: tao_8k
-      type: embedding
-      max_input_tokens: 8192
-      input_price: 0.28
-      output_vector_size: 1024
-      default_chunk_size: 2000
-      max_batch_size: 1
    - name: bce_reranker_base
      type: reranker
      max_input_tokens: 1024
@ -616,6 +615,11 @@
      input_price: 14
      output_price: 14
      supports_function_calling: true
+    - name: glm-4-alltools
+      max_input_tokens: 2048
+      input_price: 14
+      output_price: 14
+      supports_function_calling: true
    - name: glm-4-airx
      max_input_tokens: 8092
      input_price: 1.4
@ -678,50 +682,6 @@
    input_price: 0.14
    output_price: 0.14

- platform: anyscale
-  # docs:
-  #   - https://docs.anyscale.com/endpoints/text-generation/query-a-model
-  #   - https://www.anyscale.com/pricing-detail
-  models:
-    - name: meta-llama/Meta-Llama-3-8B-Instruct
-      max_input_tokens: 8192
-      input_price: 0.15
-      output_price: 0.15
-    - name: meta-llama/Meta-Llama-3-70B-Instruct
-      max_input_tokens: 8192
-      input_price: 1.0
-      output_price: 1.0
-    - name: mistralai/Mistral-7B-Instruct-v0.1
-      max_input_tokens: 16384
-      input_price: 0.15
-      output_price: 0.15
-    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
-      max_input_tokens: 32768
-      input_price: 0.50
-      output_price: 0.50
-    - name: mistralai/Mixtral-8x22B-Instruct-v0.1
-      max_input_tokens: 65536
-      input_price: 0.90
-      output_price: 0.90
-    - name: google/gemma-7b-it
-      max_input_tokens: 8192
-      input_price: 0.15
-      output_price: 0.15
-    - name: BAAI/bge-large-en-v1.5
-      type: embedding
-      max_input_tokens: 512
-      input_price: 0.05
-      output_vector_size: 1024
-      default_chunk_size: 1000
-      max_batch_size: 30
-    - name: thenlper/gte-large
-      type: embedding
-      max_input_tokens: 512
-      input_price: 0.05
-      output_vector_size: 1024
-      default_chunk_size: 1000
-      max_batch_size: 30
-
 - platform: deepinfra
  # docs:
  #   - https://deepinfra.com/models
@ -834,7 +794,7 @@
      max_input_tokens: 65536
      input_price: 0.9
      output_price: 0.9
-    - name: accounts/fireworks/models/gemma-7b-it
+    - name: accounts/fireworks/models/gemma2-9b-it
      max_input_tokens: 8192
      input_price: 0.2
      output_price: 0.2
@ -935,6 +895,10 @@
      input_price: 0.125
      output_price: 0.375
      supports_function_calling: true
+    - name: google/gemma-2-9b-it
+      max_input_tokens: 2800000
+      input_price: 0.2
+      output_price: 0.2
    - name: anthropic/claude-3.5-sonnet
      max_input_tokens: 200000
      max_output_tokens: 4096
@ -1021,7 +985,10 @@
      max_input_tokens: 28000
      input_price: 1
      output_price: 1
-
+    - name: 01-ai/yi-large
+      max_input_tokens: 32768
+      input_price: 3
+      output_price: 3

 - platform: octoai
  # docs:
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@ -51,8 +51,7 @@ register_client!(
    (qianwen, "qianwen", QianwenConfig, QianwenClient),
 );

-pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [
-    ("anyscale", "https://api.endpoints.anyscale.com/v1"),
+pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [
    ("deepinfra", "https://api.deepinfra.com/v1/openai"),
    ("deepseek", "https://api.deepseek.com"),
    ("fireworks", "https://api.fireworks.ai/inference/v1"),
--- a/src/client/vertexai.rs
+++ b/src/client/vertexai.rs
@ -69,14 +69,10 @@ impl VertexAIClient {
        let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
        let url = format!("{base_url}/google/models/{}:predict", self.model.name());

-        let task_type = match data.query {
-            true => "RETRIEVAL_DOCUMENT",
-            false => "QUESTION_ANSWERING",
-        };
        let instances: Vec<_> = data
            .texts
            .into_iter()
-            .map(|v| json!({"task_type": task_type, "content": v}))
+            .map(|v| json!({"content": v}))
            .collect();
        let body = json!({
            "instances": instances,