From 0264ab80ab56d1ae78470c3fe860580b5bdc676a Mon Sep 17 00:00:00 2001
From: sigoden <sigoden@gmail.com>
Date: Thu, 11 Jul 2024 07:40:24 +0800
Subject: [PATCH] refactor: update models.yaml and abandon anyscale (#701)

---
 Argcfile.sh            |  1 -
 config.example.yaml    |  6 ---
 models.yaml            | 99 ++++++++++++++----------------------------
 src/client/mod.rs      |  3 +-
 src/client/vertexai.rs |  6 +--
 5 files changed, 35 insertions(+), 80 deletions(-)

diff --git a/Argcfile.sh b/Argcfile.sh
index 7433e26..5706aa5 100755
--- a/Argcfile.sh
+++ b/Argcfile.sh
@@ -80,7 +80,6 @@ test-server() {
 
 OPENAI_COMPATIBLE_PLATFORMS=( \
   openai,gpt-3.5-turbo,https://api.openai.com/v1 \
-  anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \
   deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
   deepseek,deepseek-chat,https://api.deepseek.com \
   fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \
diff --git a/config.example.yaml b/config.example.yaml
index 003b42a..89407a6 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -260,12 +260,6 @@ clients:
     name: lingyiwanwu
     api_key: xxx                                      # ENV: {client}_API_KEY
 
-  # See https://docs.endpoints.anyscale.com/
-  - type: openai-compatible
-    name: anyscale
-    api_base: https://api.endpoints.anyscale.com/v1
-    api_key: xxx                                      # ENV: {client}_API_KEY
-
   # See https://deepinfra.com/docs
   - type: openai-compatible
     name: deepinfra
diff --git a/models.yaml b/models.yaml
index 3e9ba7b..43fd5bf 100644
--- a/models.yaml
+++ b/models.yaml
@@ -239,22 +239,21 @@
   models:
     - name: llama3-8b-8192
       max_input_tokens: 8192
-      input_price: 0.05
-      output_price: 0.08
+      input_price: 0
+      output_price: 0
       supports_function_calling: true
     - name: llama3-70b-8192
       max_input_tokens: 8192
-      input_price: 0.59
-      output_price: 0.79
-      supports_function_calling: true
+      input_price: 0
+      output_price: 0
     - name: mixtral-8x7b-32768
       max_input_tokens: 32768
-      input_price: 0.24
-      output_price: 0.24
-    - name: gemma-7b-it
+      input_price: 0
+      output_price: 0
+    - name: gemma2-9b-it
       max_input_tokens: 8192
-      input_price: 0.07
-      output_price: 0.07
+      input_price: 0
+      output_price: 0
 
 - platform: vertexai
   # docs:
@@ -284,6 +283,13 @@
       input_price: 0.125
       output_price: 0.375
       supports_function_calling: true
+    - name: textembedding-gecko@003
+      type: embedding
+      max_input_tokens: 3072
+      input_price: 0.025
+      output_vector_size: 2048
+      default_chunk_size: 3000
+      max_batch_size: 5
     - name: text-embedding-004
       type: embedding
       max_input_tokens: 3072
@@ -481,17 +487,17 @@
   #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
   #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
   models:
-    - name: ernie-4.0-turbo-8k
+    - name: ernie-4.0-turbo-8k-preview
       max_input_tokens: 8192
       input_price: 4.2
       output_price: 8.4
       supports_function_calling: true
-    - name: ernie-4.0-8k-0613
+    - name: ernie-4.0-8k-preview
       max_input_tokens: 8192
       input_price: 16.8
       output_price: 16.8
       supports_function_calling: true
-    - name: ernie-3.5-8k-0613
+    - name: ernie-3.5-8k-preview
       max_input_tokens: 8192
       input_price: 1.68
       output_price: 1.68
@@ -514,13 +520,6 @@
       output_vector_size: 1024
       default_chunk_size: 1000
       max_batch_size: 16
-    - name: tao_8k
-      type: embedding
-      max_input_tokens: 8192
-      input_price: 0.28
-      output_vector_size: 1024
-      default_chunk_size: 2000
-      max_batch_size: 1
     - name: bce_reranker_base
       type: reranker
       max_input_tokens: 1024
@@ -616,6 +615,11 @@
       input_price: 14
       output_price: 14
       supports_function_calling: true
+    - name: glm-4-alltools
+      max_input_tokens: 2048
+      input_price: 14
+      output_price: 14
+      supports_function_calling: true
     - name: glm-4-airx
       max_input_tokens: 8092
       input_price: 1.4
@@ -678,50 +682,6 @@
     input_price: 0.14
     output_price: 0.14
 
-- platform: anyscale
-  # docs:
-  #   - https://docs.anyscale.com/endpoints/text-generation/query-a-model
-  #   - https://www.anyscale.com/pricing-detail
-  models:
-    - name: meta-llama/Meta-Llama-3-8B-Instruct
-      max_input_tokens: 8192
-      input_price: 0.15
-      output_price: 0.15
-    - name: meta-llama/Meta-Llama-3-70B-Instruct
-      max_input_tokens: 8192
-      input_price: 1.0
-      output_price: 1.0
-    - name: mistralai/Mistral-7B-Instruct-v0.1
-      max_input_tokens: 16384
-      input_price: 0.15
-      output_price: 0.15
-    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
-      max_input_tokens: 32768
-      input_price: 0.50
-      output_price: 0.50
-    - name: mistralai/Mixtral-8x22B-Instruct-v0.1
-      max_input_tokens: 65536
-      input_price: 0.90
-      output_price: 0.90
-    - name: google/gemma-7b-it
-      max_input_tokens: 8192
-      input_price: 0.15
-      output_price: 0.15
-    - name: BAAI/bge-large-en-v1.5
-      type: embedding
-      max_input_tokens: 512
-      input_price: 0.05
-      output_vector_size: 1024
-      default_chunk_size: 1000
-      max_batch_size: 30
-    - name: thenlper/gte-large
-      type: embedding
-      max_input_tokens: 512
-      input_price: 0.05
-      output_vector_size: 1024
-      default_chunk_size: 1000
-      max_batch_size: 30
-
 - platform: deepinfra
   # docs:
   #   - https://deepinfra.com/models
@@ -834,7 +794,7 @@
       max_input_tokens: 65536
       input_price: 0.9
       output_price: 0.9
-    - name: accounts/fireworks/models/gemma-7b-it
+    - name: accounts/fireworks/models/gemma2-9b-it
       max_input_tokens: 8192
       input_price: 0.2
       output_price: 0.2
@@ -935,6 +895,10 @@
       input_price: 0.125
       output_price: 0.375
       supports_function_calling: true
+    - name: google/gemma-2-9b-it
+      max_input_tokens: 2800000
+      input_price: 0.2
+      output_price: 0.2
     - name: anthropic/claude-3.5-sonnet
       max_input_tokens: 200000
       max_output_tokens: 4096
@@ -1021,7 +985,10 @@
       max_input_tokens: 28000
       input_price: 1
       output_price: 1
-
+    - name: 01-ai/yi-large
+      max_input_tokens: 32768
+      input_price: 3
+      output_price: 3
 
 - platform: octoai
   # docs:
diff --git a/src/client/mod.rs b/src/client/mod.rs
index 6878541..b4c0091 100644
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@@ -51,8 +51,7 @@ register_client!(
     (qianwen, "qianwen", QianwenConfig, QianwenClient),
 );
 
-pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [
-    ("anyscale", "https://api.endpoints.anyscale.com/v1"),
+pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [
     ("deepinfra", "https://api.deepinfra.com/v1/openai"),
     ("deepseek", "https://api.deepseek.com"),
     ("fireworks", "https://api.fireworks.ai/inference/v1"),
diff --git a/src/client/vertexai.rs b/src/client/vertexai.rs
index a4c4050..828c300 100644
--- a/src/client/vertexai.rs
+++ b/src/client/vertexai.rs
@@ -69,14 +69,10 @@ impl VertexAIClient {
         let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
         let url = format!("{base_url}/google/models/{}:predict", self.model.name());
 
-        let task_type = match data.query {
-            true => "RETRIEVAL_DOCUMENT",
-            false => "QUESTION_ANSWERING",
-        };
         let instances: Vec<_> = data
             .texts
             .into_iter()
-            .map(|v| json!({"task_type": task_type, "content": v}))
+            .map(|v| json!({"content": v}))
             .collect();
         let body = json!({
             "instances": instances,