refactor: update models.yaml and abandon anyscale (#701)

2024-11-08 13:10:28 +00:00 · 2024-07-11 07:40:24 +08:00 · 2024-07-11 07:40:24 +08:00 · 0264ab80ab
commit 0264ab80ab
parent cee0eb453e
5 changed files with 35 additions and 80 deletions
--- a/Argcfile.sh
+++ b/Argcfile.sh
@ -80,7 +80,6 @@ test-server() {
 OPENAI_COMPATIBLE_PLATFORMS=( \
  openai,gpt-3.5-turbo,https://api.openai.com/v1 \
  anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \
  deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
  deepseek,deepseek-chat,https://api.deepseek.com \
  fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \
--- a/config.example.yaml
+++ b/config.example.yaml
@ -260,12 +260,6 @@ clients:
    name: lingyiwanwu
    api_key: xxx                                      # ENV: {client}_API_KEY
  # See https://docs.endpoints.anyscale.com/
  - type: openai-compatible
    name: anyscale
    api_base: https://api.endpoints.anyscale.com/v1
    api_key: xxx                                      # ENV: {client}_API_KEY
  # See https://deepinfra.com/docs
  - type: openai-compatible
    name: deepinfra
--- a/models.yaml
+++ b/models.yaml
@ -239,22 +239,21 @@
  models:
    - name: llama3-8b-8192
      max_input_tokens: 8192
-      input_price: 0.05
+      input_price: 0
-      output_price: 0.08
+      output_price: 0
      supports_function_calling: true
    - name: llama3-70b-8192
      max_input_tokens: 8192
-      input_price: 0.59
+      input_price: 0
-      output_price: 0.79
+      output_price: 0
      supports_function_calling: true
    - name: mixtral-8x7b-32768
      max_input_tokens: 32768
-      input_price: 0.24
+      input_price: 0
-      output_price: 0.24
+      output_price: 0
-    - name: gemma-7b-it
+    - name: gemma2-9b-it
      max_input_tokens: 8192
-      input_price: 0.07
+      input_price: 0
-      output_price: 0.07
+      output_price: 0
 - platform: vertexai
  # docs:
@ -284,6 +283,13 @@
      input_price: 0.125
      output_price: 0.375
      supports_function_calling: true
    - name: textembedding-gecko@003
      type: embedding
      max_input_tokens: 3072
      input_price: 0.025
      output_vector_size: 2048
      default_chunk_size: 3000
      max_batch_size: 5
    - name: text-embedding-004
      type: embedding
      max_input_tokens: 3072
@ -481,17 +487,17 @@
  #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
  #   - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
  models:
-    - name: ernie-4.0-turbo-8k
+    - name: ernie-4.0-turbo-8k-preview
      max_input_tokens: 8192
      input_price: 4.2
      output_price: 8.4
      supports_function_calling: true
-    - name: ernie-4.0-8k-0613
+    - name: ernie-4.0-8k-preview
      max_input_tokens: 8192
      input_price: 16.8
      output_price: 16.8
      supports_function_calling: true
-    - name: ernie-3.5-8k-0613
+    - name: ernie-3.5-8k-preview
      max_input_tokens: 8192
      input_price: 1.68
      output_price: 1.68
@ -514,13 +520,6 @@
      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 16
    - name: tao_8k
      type: embedding
      max_input_tokens: 8192
      input_price: 0.28
      output_vector_size: 1024
      default_chunk_size: 2000
      max_batch_size: 1
    - name: bce_reranker_base
      type: reranker
      max_input_tokens: 1024
@ -616,6 +615,11 @@
      input_price: 14
      output_price: 14
      supports_function_calling: true
    - name: glm-4-alltools
      max_input_tokens: 2048
      input_price: 14
      output_price: 14
      supports_function_calling: true
    - name: glm-4-airx
      max_input_tokens: 8092
      input_price: 1.4
@ -678,50 +682,6 @@
    input_price: 0.14
    output_price: 0.14
 - platform: anyscale
  # docs:
  #   - https://docs.anyscale.com/endpoints/text-generation/query-a-model
  #   - https://www.anyscale.com/pricing-detail
  models:
    - name: meta-llama/Meta-Llama-3-8B-Instruct
      max_input_tokens: 8192
      input_price: 0.15
      output_price: 0.15
    - name: meta-llama/Meta-Llama-3-70B-Instruct
      max_input_tokens: 8192
      input_price: 1.0
      output_price: 1.0
    - name: mistralai/Mistral-7B-Instruct-v0.1
      max_input_tokens: 16384
      input_price: 0.15
      output_price: 0.15
    - name: mistralai/Mixtral-8x7B-Instruct-v0.1
      max_input_tokens: 32768
      input_price: 0.50
      output_price: 0.50
    - name: mistralai/Mixtral-8x22B-Instruct-v0.1
      max_input_tokens: 65536
      input_price: 0.90
      output_price: 0.90
    - name: google/gemma-7b-it
      max_input_tokens: 8192
      input_price: 0.15
      output_price: 0.15
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      max_input_tokens: 512
      input_price: 0.05
      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 30
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
      input_price: 0.05
      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 30
 - platform: deepinfra
  # docs:
  #   - https://deepinfra.com/models
@ -834,7 +794,7 @@
      max_input_tokens: 65536
      input_price: 0.9
      output_price: 0.9
-    - name: accounts/fireworks/models/gemma-7b-it
+    - name: accounts/fireworks/models/gemma2-9b-it
      max_input_tokens: 8192
      input_price: 0.2
      output_price: 0.2
@ -935,6 +895,10 @@
      input_price: 0.125
      output_price: 0.375
      supports_function_calling: true
    - name: google/gemma-2-9b-it
      max_input_tokens: 2800000
      input_price: 0.2
      output_price: 0.2
    - name: anthropic/claude-3.5-sonnet
      max_input_tokens: 200000
      max_output_tokens: 4096
@ -1021,7 +985,10 @@
      max_input_tokens: 28000
      input_price: 1
      output_price: 1
-
+    - name: 01-ai/yi-large
      max_input_tokens: 32768
      input_price: 3
      output_price: 3
 - platform: octoai
  # docs:
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@ -51,8 +51,7 @@ register_client!(
    (qianwen, "qianwen", QianwenConfig, QianwenClient),
 );
-pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [
+pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [
    ("anyscale", "https://api.endpoints.anyscale.com/v1"),
    ("deepinfra", "https://api.deepinfra.com/v1/openai"),
    ("deepseek", "https://api.deepseek.com"),
    ("fireworks", "https://api.fireworks.ai/inference/v1"),
--- a/src/client/vertexai.rs
+++ b/src/client/vertexai.rs
@ -69,14 +69,10 @@ impl VertexAIClient {
        let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
        let url = format!("{base_url}/google/models/{}:predict", self.model.name());
        let task_type = match data.query {
            true => "RETRIEVAL_DOCUMENT",
            false => "QUESTION_ANSWERING",
        };
        let instances: Vec<_> = data
            .texts
            .into_iter()
-            .map(|v| json!({"task_type": task_type, "content": v}))
+            .map(|v| json!({"content": v}))
            .collect();
        let body = json!({
            "instances": instances,