refactor: embedding model add price and dimension (#636)

This commit is contained in:
sigoden 2024-06-23 07:17:26 +08:00 committed by GitHub
parent 590c525048
commit 6d148c9c53
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 143 additions and 56 deletions

View File

@ -32,16 +32,15 @@
- name: text-embedding-3-large
type: embedding
max_input_tokens: 8191
input_price: 0.13
output_vector_size: 3072
default_chunk_size: 3000
max_batch_size: 100
- name: text-embedding-3-small
type: embedding
max_input_tokens: 8191
default_chunk_size: 3000
max_batch_size: 100
- name: text-embedding-ada-002
type: embedding
max_input_tokens: 8191
input_price: 0.02
output_vector_size: 1536
default_chunk_size: 3000
max_batch_size: 100
@ -153,6 +152,8 @@
output_price: 3
- name: mistral-embed
type: embedding
input_price: 0.1
output_vector_size: 1024
max_input_tokens: 8092
default_chunk_size: 2000
@ -175,11 +176,15 @@
- name: embed-english-v3.0
type: embedding
max_input_tokens: 512
input_price: 0.1
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 96
- name: embed-multilingual-v3.0
type: embedding
max_input_tokens: 512
input_price: 0.1
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 96
- name: rerank-english-v3.0
@ -282,12 +287,16 @@
- name: text-embedding-004
type: embedding
max_input_tokens: 3072
default_chunk_size: 2000
input_price: 0.025
output_vector_size: 768
default_chunk_size: 1500
max_batch_size: 5
- name: text-multilingual-embedding-002
type: embedding
max_input_tokens: 3072
default_chunk_size: 2000
input_price: 0.2
output_vector_size: 768
default_chunk_size: 1500
max_batch_size: 5
- platform: vertexai-claude
@ -431,11 +440,15 @@
- name: '@cf/baai/bge-base-en-v1.5'
type: embedding
max_input_tokens: 512
input_price: 0
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100
- name: '@cf/baai/bge-large-en-v1.5'
type: embedding
max_input_tokens: 512
input_price: 0
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
@ -494,24 +507,25 @@
max_input_tokens: 128000
input_price: 0
output_price: 0
- name: embedding-v1
type: embedding
max_input_tokens: 384
default_chunk_size: 700
max_batch_size: 16
- name: bge_large_zh
type: embedding
max_input_tokens: 512
input_price: 0.28
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 16
- name: bge_large_en
type: embedding
max_input_tokens: 512
input_price: 0.28
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 16
- name: tao_8k
type: embedding
max_input_tokens: 8192
input_price: 0.28
output_vector_size: 1024
default_chunk_size: 2000
max_batch_size: 1
- name: bce_reranker_base
@ -557,6 +571,8 @@
- name: text-embedding-v2
type: embedding
max_input_tokens: 2048
input_price: 0.1
output_vector_size: 1536
default_chunk_size: 1500
max_batch_size: 25
@ -628,8 +644,10 @@
supports_vision: true
- name: embedding-2
type: embedding
max_input_tokens: 2048
default_chunk_size: 1500
max_input_tokens: 512
input_price: 0.07
output_vector_size: 1024
default_chunk_size: 1000
- platform: lingyiwanwu
# docs:
@ -698,11 +716,15 @@
- name: BAAI/bge-large-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.05
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 30
- name: thenlper/gte-large
type: embedding
max_input_tokens: 512
input_price: 0.05
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 30
@ -750,51 +772,57 @@
- name: BAAI/bge-large-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.01
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-base-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.005
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-m3
type: embedding
max_input_tokens: 8192
input_price: 0.01
output_vector_size: 1024
default_chunk_size: 2000
max_batch_size: 100
- name: intfloat/e5-base-v2
type: embedding
max_input_tokens: 512
input_price: 0.005
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100
- name: intfloat/e5-large-v2
type: embedding
max_input_tokens: 512
input_price: 0.01
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: intfloat/multilingual-e5-large
type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_batch_size: 100
- name: sentence-transformers/all-MiniLM-L6-v2
type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_batch_size: 100
- name: sentence-transformers/paraphrase-MiniLM-L6-v2
type: embedding
max_input_tokens: 512
input_price: 0.01
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-base
type: embedding
max_input_tokens: 512
input_price: 0.005
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
max_input_tokens: 512
input_price: 0.01
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
@ -853,21 +881,29 @@
- name: nomic-ai/nomic-embed-text-v1.5
type: embedding
max_input_tokens: 8192
input_price: 0.008
output_vector_size: 768
default_chunk_size: 1500
max_batch_size: 100
- name: WhereIsAI/UAE-Large-V1
type: embedding
max_input_tokens: 512
input_price: 0.016
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-large
type: embedding
max_input_tokens: 512
input_price: 0.016
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: thenlper/gte-base
type: embedding
max_input_tokens: 512
input_price: 0.008
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100
@ -1049,6 +1085,8 @@
- name: thenlper/gte-large
type: embedding
max_input_tokens: 512
input_price: 0.05
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
@ -1090,15 +1128,21 @@
- name: WhereIsAI/UAE-Large-V1
type: embedding
max_input_tokens: 512
input_price: 0.016
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-large-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.016
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 100
- name: BAAI/bge-base-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.008
output_vector_size: 768
default_chunk_size: 1000
max_batch_size: 100

View File

@ -124,35 +124,56 @@ impl Model {
}
pub fn description(&self) -> String {
let ModelData {
max_input_tokens,
max_output_tokens,
input_price,
output_price,
supports_vision,
supports_function_calling,
..
} = &self.data;
let max_input_tokens = format_option_value(max_input_tokens);
let max_output_tokens = format_option_value(max_output_tokens);
let input_price = format_option_value(input_price);
let output_price = format_option_value(output_price);
let mut capabilities = vec![];
if *supports_vision {
capabilities.push('👁');
};
if *supports_function_calling {
capabilities.push('⚒');
};
let capabilities: String = capabilities
.into_iter()
.map(|v| format!("{v} "))
.collect::<Vec<String>>()
.join("");
format!(
"{:>8} / {:>8} | {:>6} / {:>6} {:>6}",
max_input_tokens, max_output_tokens, input_price, output_price, capabilities
)
match self.model_type() {
"chat" => {
let ModelData {
max_input_tokens,
max_output_tokens,
input_price,
output_price,
supports_vision,
supports_function_calling,
..
} = &self.data;
let max_input_tokens = format_option_value(max_input_tokens);
let max_output_tokens = format_option_value(max_output_tokens);
let input_price = format_option_value(input_price);
let output_price = format_option_value(output_price);
let mut capabilities = vec![];
if *supports_vision {
capabilities.push('👁');
};
if *supports_function_calling {
capabilities.push('⚒');
};
let capabilities: String = capabilities
.into_iter()
.map(|v| format!("{v} "))
.collect::<Vec<String>>()
.join("");
format!(
"{:>8} / {:>8} | {:>6} / {:>6} {:>6}",
max_input_tokens, max_output_tokens, input_price, output_price, capabilities
)
}
"embedding" => {
let ModelData {
max_input_tokens,
input_price,
output_vector_size,
max_batch_size,
..
} = &self.data;
let dimension = format_option_value(output_vector_size);
let max_tokens = format_option_value(max_input_tokens);
let price = format_option_value(input_price);
let batch = format_option_value(max_batch_size);
format!(
"dimension:{dimension}; max-tokens:{max_tokens}; price:{price}; batch:{batch}"
)
}
_ => String::new(),
}
}
pub fn max_input_tokens(&self) -> Option<usize> {
@ -261,6 +282,7 @@ pub struct ModelData {
pub supports_function_calling: bool,
// embedding-only properties
pub output_vector_size: Option<usize>,
pub default_chunk_size: Option<usize>,
pub max_batch_size: Option<usize>,
}

View File

@ -548,9 +548,12 @@ pub fn split_document_id(value: DocumentId) -> (usize, usize) {
}
fn select_embedding_model(models: &[&Model]) -> Result<String> {
let model_ids: Vec<_> = models.iter().map(|v| v.id()).collect();
let model_id = Select::new("Select embedding model:", model_ids).prompt()?;
Ok(model_id)
let models: Vec<_> = models
.iter()
.map(|v| SelectOption::new(v.id(), v.description()))
.collect();
let result = Select::new("Select embedding model:", models).prompt()?;
Ok(result.value)
}
fn set_chunk_size(model: &Model) -> Result<usize> {

View File

@ -54,3 +54,21 @@ fn validate_integer(text: &str) -> Validation {
Validation::Valid
}
}
#[derive(Debug)]
pub struct SelectOption {
pub value: String,
pub description: String,
}
impl SelectOption {
pub fn new(value: String, description: String) -> Self {
Self { value, description }
}
}
impl std::fmt::Display for SelectOption {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ({})", self.value, self.description)
}
}