diff --git a/assets/arena.html b/assets/arena.html index 9808084..f0c93e5 100644 --- a/assets/arena.html +++ b/assets/arena.html @@ -565,7 +565,7 @@ async init() { try { const models = await fetchJSON(MODELS_API); - this.models = models.filter(v => !v.mode || v.mode === "chat"); + this.models = models.filter(v => !v.type || v.type === "chat"); } catch (err) { toast("No available model"); console.error("Failed to load models", err); diff --git a/assets/playground.html b/assets/playground.html index 6ef1f1a..72c1e3e 100644 --- a/assets/playground.html +++ b/assets/playground.html @@ -741,7 +741,7 @@ async init() { await Promise.all([ fetchJSON(MODELS_API).then(models => { - this.models = models.filter(v => !v.mode || v.mode === "chat"); + this.models = models.filter(v => !v.type || v.type === "chat"); }).catch(err => { toast("No model available"); console.error("Failed to load models", err); diff --git a/config.example.yaml b/config.example.yaml index 80f6adc..ae9c53f 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -83,17 +83,16 @@ clients: # name: xxxx # Only use it to distinguish clients with the same client type. Optional # models: # - name: xxxx - # mode: chat # Chat model # max_input_tokens: 100000 # supports_vision: true # supports_function_calling: true # - name: xxxx - # mode: embedding # Embedding model + # type: embedding # Embedding model # max_input_tokens: 2048 # default_chunk_size: 2000 # max_concurrent_chunks: 100 # - name: xxxx - # mode: rerank # Rerank model + # type: rerank # Rerank model # max_input_tokens: 2048 # patches: # : # The regex to match model names, e.g. '.*' 'gpt-4o' 'gpt-4o|gpt-4-.*' @@ -172,7 +171,7 @@ clients: - name: llama3 max_input_tokens: 8192 - name: all-minilm:l6-v2 - mode: embedding + type: embedding max_chunk_size: 1000 # See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart diff --git a/models.yaml b/models.yaml index 0d61152..ccb0c60 100644 --- a/models.yaml +++ b/models.yaml @@ -30,17 +30,17 @@ output_price: 1.5 supports_function_calling: true - name: text-embedding-3-large - mode: embedding + type: embedding max_input_tokens: 8191 default_chunk_size: 3000 max_concurrent_chunks: 100 - name: text-embedding-3-small - mode: embedding + type: embedding max_input_tokens: 8191 default_chunk_size: 3000 max_concurrent_chunks: 100 - name: text-embedding-ada-002 - mode: embedding + type: embedding max_input_tokens: 8191 default_chunk_size: 3000 max_concurrent_chunks: 100 @@ -74,7 +74,7 @@ output_price: 1.5 supports_function_calling: true - name: text-embedding-004 - mode: embedding + type: embedding max_input_tokens: 2048 default_chunk_size: 1500 max_concurrent_chunks: 5 @@ -152,7 +152,7 @@ input_price: 1 output_price: 3 - name: mistral-embed - mode: embedding + type: embedding max_input_tokens: 8092 default_chunk_size: 2000 @@ -173,20 +173,20 @@ output_price: 15 supports_function_calling: true - name: embed-english-v3.0 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 96 - name: embed-multilingual-v3.0 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 96 - name: rerank-english-v3.0 - mode: rerank + type: rerank max_input_tokens: 4096 - name: rerank-multilingual-v3.0 - mode: rerank + type: rerank max_input_tokens: 4096 - platform: reka @@ -304,12 +304,12 @@ output_price: 0.375 supports_function_calling: true - name: text-embedding-004 - mode: embedding + type: embedding max_input_tokens: 3072 default_chunk_size: 2000 max_concurrent_chunks: 5 - name: text-multilingual-embedding-002 - mode: embedding + type: embedding max_input_tokens: 3072 default_chunk_size: 2000 max_concurrent_chunks: 5 @@ -457,12 +457,12 @@ input_price: 0 output_price: 0 - name: '@cf/baai/bge-base-en-v1.5' - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: '@cf/baai/bge-large-en-v1.5' - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 @@ -575,7 +575,7 @@ output_price: 2.8 supports_vision: true - name: text-embedding-v2 - mode: embedding + type: embedding max_input_tokens: 2048 default_chunk_size: 1500 max_concurrent_chunks: 25 @@ -649,7 +649,7 @@ output_price: 7 supports_vision: true - name: embedding-2 - mode: embedding + type: embedding max_input_tokens: 2048 default_chunk_size: 1500 @@ -718,12 +718,12 @@ input_price: 0.15 output_price: 0.15 - name: BAAI/bge-large-en-v1.5 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 30 - name: thenlper/gte-large - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 30 @@ -770,52 +770,52 @@ input_price: 0.14 output_price: 0.14 - name: BAAI/bge-large-en-v1.5 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: BAAI/bge-base-en-v1.5 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: BAAI/bge-m3 - mode: embedding + type: embedding max_input_tokens: 8192 default_chunk_size: 2000 max_concurrent_chunks: 100 - name: intfloat/e5-base-v2 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: intfloat/e5-large-v2 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: intfloat/multilingual-e5-large - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: sentence-transformers/all-MiniLM-L6-v2 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: sentence-transformers/paraphrase-MiniLM-L6-v2 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: thenlper/gte-base - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: thenlper/gte-large - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 @@ -873,22 +873,22 @@ output_price: 0.2 supports_vision: true - name: nomic-ai/nomic-embed-text-v1.5 - mode: embedding + type: embedding max_input_tokens: 8192 default_chunk_size: 1500 max_concurrent_chunks: 100 - name: WhereIsAI/UAE-Large-V1 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: thenlper/gte-large - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: thenlper/gte-base - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 @@ -1069,7 +1069,7 @@ input_price: 0.86 output_price: 0.86 - name: thenlper/gte-large - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 @@ -1110,17 +1110,17 @@ output_price: 0.9 max_concurrent_chunks: 100 - name: WhereIsAI/UAE-Large-V1 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: BAAI/bge-large-en-v1.5 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 - name: BAAI/bge-base-en-v1.5 - mode: embedding + type: embedding max_input_tokens: 512 default_chunk_size: 1000 max_concurrent_chunks: 100 \ No newline at end of file diff --git a/src/client/common.rs b/src/client/common.rs index 6c86acd..de2fa50 100644 --- a/src/client/common.rs +++ b/src/client/common.rs @@ -145,15 +145,15 @@ macro_rules! register_client { } pub fn list_chat_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> { - list_models(config).into_iter().filter(|v| v.mode() == "chat").collect() + list_models(config).into_iter().filter(|v| v.model_type() == "chat").collect() } pub fn list_embedding_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> { - list_models(config).into_iter().filter(|v| v.mode() == "embedding").collect() + list_models(config).into_iter().filter(|v| v.model_type() == "embedding").collect() } pub fn list_rerank_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> { - list_models(config).into_iter().filter(|v| v.mode() == "rerank").collect() + list_models(config).into_iter().filter(|v| v.model_type() == "rerank").collect() } }; } diff --git a/src/client/model.rs b/src/client/model.rs index 06f5577..0584f82 100644 --- a/src/client/model.rs +++ b/src/client/model.rs @@ -111,8 +111,8 @@ impl Model { &self.data.name } - pub fn mode(&self) -> &str { - &self.data.mode + pub fn model_type(&self) -> &str { + &self.data.model_type } pub fn data(&self) -> &ModelData { @@ -245,8 +245,8 @@ impl Model { #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct ModelData { pub name: String, - #[serde(default = "default_model_mode")] - pub mode: String, + #[serde(default = "default_model_type", rename = "type")] + pub model_type: String, pub max_input_tokens: Option, pub input_price: Option, pub output_price: Option, @@ -280,6 +280,6 @@ pub struct BuiltinModels { pub models: Vec, } -fn default_model_mode() -> String { +fn default_model_type() -> String { "chat".into() }