refactor: embedding model add price and dimension (#636)

3 months ago · 6d148c9c53
parent 590c525048
commit 6d148c9c53
4 changed files with 143 additions and 56 deletions
--- a/models.yaml
+++ b/models.yaml
@ -32,16 +32,15 @@
    - name: text-embedding-3-large
      type: embedding
      max_input_tokens: 8191
+      input_price: 0.13
+      output_vector_size: 3072
      default_chunk_size: 3000
      max_batch_size: 100
    - name: text-embedding-3-small
      type: embedding
      max_input_tokens: 8191
-      default_chunk_size: 3000
-      max_batch_size: 100
-    - name: text-embedding-ada-002
-      type: embedding
-      max_input_tokens: 8191
+      input_price: 0.02
+      output_vector_size: 1536
      default_chunk_size: 3000
      max_batch_size: 100

@ -153,6 +152,8 @@
      output_price: 3
    - name: mistral-embed
      type: embedding
+      input_price: 0.1
+      output_vector_size: 1024
      max_input_tokens: 8092
      default_chunk_size: 2000

@ -175,11 +176,15 @@
    - name: embed-english-v3.0
      type: embedding
      max_input_tokens: 512
+      input_price: 0.1
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 96
    - name: embed-multilingual-v3.0
      type: embedding
      max_input_tokens: 512
+      input_price: 0.1
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 96
    - name: rerank-english-v3.0
@ -282,12 +287,16 @@
    - name: text-embedding-004
      type: embedding
      max_input_tokens: 3072
-      default_chunk_size: 2000
+      input_price: 0.025
+      output_vector_size: 768
+      default_chunk_size: 1500
      max_batch_size: 5
    - name: text-multilingual-embedding-002
      type: embedding
      max_input_tokens: 3072
-      default_chunk_size: 2000
+      input_price: 0.2
+      output_vector_size: 768
+      default_chunk_size: 1500
      max_batch_size: 5

 - platform: vertexai-claude
@ -431,11 +440,15 @@
    - name: '@cf/baai/bge-base-en-v1.5'
      type: embedding
      max_input_tokens: 512
+      input_price: 0
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100
    - name: '@cf/baai/bge-large-en-v1.5'
      type: embedding
      max_input_tokens: 512
+      input_price: 0
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100

@ -494,24 +507,25 @@
      max_input_tokens: 128000
      input_price: 0
      output_price: 0
-    - name: embedding-v1
-      type: embedding
-      max_input_tokens: 384
-      default_chunk_size: 700
-      max_batch_size: 16 
    - name: bge_large_zh
      type: embedding
      max_input_tokens: 512
+      input_price: 0.28
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 16
    - name: bge_large_en
      type: embedding
      max_input_tokens: 512
+      input_price: 0.28
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 16
    - name: tao_8k
      type: embedding
      max_input_tokens: 8192
+      input_price: 0.28
+      output_vector_size: 1024
      default_chunk_size: 2000
      max_batch_size: 1
    - name: bce_reranker_base
@ -557,6 +571,8 @@
    - name: text-embedding-v2
      type: embedding
      max_input_tokens: 2048
+      input_price: 0.1
+      output_vector_size: 1536
      default_chunk_size: 1500
      max_batch_size: 25

@ -628,8 +644,10 @@
      supports_vision: true
    - name: embedding-2
      type: embedding
-      max_input_tokens: 2048
-      default_chunk_size: 1500
+      max_input_tokens: 512
+      input_price: 0.07
+      output_vector_size: 1024
+      default_chunk_size: 1000

 - platform: lingyiwanwu
  # docs:
@ -698,11 +716,15 @@
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      max_input_tokens: 512
+      input_price: 0.05
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 30
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
+      input_price: 0.05
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 30

@ -750,51 +772,57 @@
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      max_input_tokens: 512
+      input_price: 0.01
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-base-en-v1.5
      type: embedding
      max_input_tokens: 512
+      input_price: 0.005
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-m3
      type: embedding
      max_input_tokens: 8192
+      input_price: 0.01
+      output_vector_size: 1024
      default_chunk_size: 2000
      max_batch_size: 100
    - name: intfloat/e5-base-v2
      type: embedding
      max_input_tokens: 512
+      input_price: 0.005
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100
    - name: intfloat/e5-large-v2
      type: embedding
      max_input_tokens: 512
+      input_price: 0.01
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: intfloat/multilingual-e5-large
      type: embedding
      max_input_tokens: 512
-      default_chunk_size: 1000
-      max_batch_size: 100
-    - name: sentence-transformers/all-MiniLM-L6-v2
-      type: embedding
-      max_input_tokens: 512
-      default_chunk_size: 1000
-      max_batch_size: 100
-    - name: sentence-transformers/paraphrase-MiniLM-L6-v2
-      type: embedding
-      max_input_tokens: 512
+      input_price: 0.01
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-base
      type: embedding
      max_input_tokens: 512
+      input_price: 0.005
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
+      input_price: 0.01
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100

@ -853,21 +881,29 @@
    - name: nomic-ai/nomic-embed-text-v1.5
      type: embedding
      max_input_tokens: 8192
+      input_price: 0.008
+      output_vector_size: 768
      default_chunk_size: 1500
      max_batch_size: 100
    - name: WhereIsAI/UAE-Large-V1
      type: embedding
      max_input_tokens: 512
+      input_price: 0.016
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
+      input_price: 0.016
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: thenlper/gte-base
      type: embedding
      max_input_tokens: 512
+      input_price: 0.008
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100

@ -1049,6 +1085,8 @@
    - name: thenlper/gte-large
      type: embedding
      max_input_tokens: 512
+      input_price: 0.05
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100

@ -1090,15 +1128,21 @@
    - name: WhereIsAI/UAE-Large-V1
      type: embedding
      max_input_tokens: 512
+      input_price: 0.016
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-large-en-v1.5
      type: embedding
      max_input_tokens: 512
+      input_price: 0.016
+      output_vector_size: 1024
      default_chunk_size: 1000
      max_batch_size: 100
    - name: BAAI/bge-base-en-v1.5
      type: embedding
      max_input_tokens: 512
+      input_price: 0.008
+      output_vector_size: 768
      default_chunk_size: 1000
      max_batch_size: 100
--- a/src/client/model.rs
+++ b/src/client/model.rs
@ -124,35 +124,56 @@ impl Model {
    }

    pub fn description(&self) -> String {
-        let ModelData {
-            max_input_tokens,
-            max_output_tokens,
-            input_price,
-            output_price,
-            supports_vision,
-            supports_function_calling,
-            ..
-        } = &self.data;
-        let max_input_tokens = format_option_value(max_input_tokens);
-        let max_output_tokens = format_option_value(max_output_tokens);
-        let input_price = format_option_value(input_price);
-        let output_price = format_option_value(output_price);
-        let mut capabilities = vec![];
-        if *supports_vision {
-            capabilities.push('👁');
-        };
-        if *supports_function_calling {
-            capabilities.push('⚒');
-        };
-        let capabilities: String = capabilities
-            .into_iter()
-            .map(|v| format!("{v} "))
-            .collect::<Vec<String>>()
-            .join("");
-        format!(
-            "{:>8} / {:>8}  |  {:>6} / {:>6}  {:>6}",
-            max_input_tokens, max_output_tokens, input_price, output_price, capabilities
-        )
+        match self.model_type() {
+            "chat" => {
+                let ModelData {
+                    max_input_tokens,
+                    max_output_tokens,
+                    input_price,
+                    output_price,
+                    supports_vision,
+                    supports_function_calling,
+                    ..
+                } = &self.data;
+                let max_input_tokens = format_option_value(max_input_tokens);
+                let max_output_tokens = format_option_value(max_output_tokens);
+                let input_price = format_option_value(input_price);
+                let output_price = format_option_value(output_price);
+                let mut capabilities = vec![];
+                if *supports_vision {
+                    capabilities.push('👁');
+                };
+                if *supports_function_calling {
+                    capabilities.push('⚒');
+                };
+                let capabilities: String = capabilities
+                    .into_iter()
+                    .map(|v| format!("{v} "))
+                    .collect::<Vec<String>>()
+                    .join("");
+                format!(
+                    "{:>8} / {:>8}  |  {:>6} / {:>6}  {:>6}",
+                    max_input_tokens, max_output_tokens, input_price, output_price, capabilities
+                )
+            }
+            "embedding" => {
+                let ModelData {
+                    max_input_tokens,
+                    input_price,
+                    output_vector_size,
+                    max_batch_size,
+                    ..
+                } = &self.data;
+                let dimension = format_option_value(output_vector_size);
+                let max_tokens = format_option_value(max_input_tokens);
+                let price = format_option_value(input_price);
+                let batch = format_option_value(max_batch_size);
+                format!(
+                    "dimension:{dimension}; max-tokens:{max_tokens}; price:{price}; batch:{batch}"
+                )
+            }
+            _ => String::new(),
+        }
    }

    pub fn max_input_tokens(&self) -> Option<usize> {
@ -261,6 +282,7 @@ pub struct ModelData {
    pub supports_function_calling: bool,

    // embedding-only properties
+    pub output_vector_size: Option<usize>,
    pub default_chunk_size: Option<usize>,
    pub max_batch_size: Option<usize>,
 }
--- a/src/rag/mod.rs
+++ b/src/rag/mod.rs
@ -548,9 +548,12 @@ pub fn split_document_id(value: DocumentId) -> (usize, usize) {
 }

 fn select_embedding_model(models: &[&Model]) -> Result<String> {
-    let model_ids: Vec<_> = models.iter().map(|v| v.id()).collect();
-    let model_id = Select::new("Select embedding model:", model_ids).prompt()?;
-    Ok(model_id)
+    let models: Vec<_> = models
+        .iter()
+        .map(|v| SelectOption::new(v.id(), v.description()))
+        .collect();
+    let result = Select::new("Select embedding model:", models).prompt()?;
+    Ok(result.value)
 }

 fn set_chunk_size(model: &Model) -> Result<usize> {
--- a/src/utils/prompt_input.rs
+++ b/src/utils/prompt_input.rs
@ -54,3 +54,21 @@ fn validate_integer(text: &str) -> Validation {
        Validation::Valid
    }
 }
+
+#[derive(Debug)]
+pub struct SelectOption {
+    pub value: String,
+    pub description: String,
+}
+
+impl SelectOption {
+    pub fn new(value: String, description: String) -> Self {
+        Self { value, description }
+    }
+}
+
+impl std::fmt::Display for SelectOption {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{} ({})", self.value, self.description)
+    }
+}