diff --git a/assets/arena.html b/assets/arena.html
index 9808084..f0c93e5 100644
--- a/assets/arena.html
+++ b/assets/arena.html
@@ -565,7 +565,7 @@
async init() {
try {
const models = await fetchJSON(MODELS_API);
- this.models = models.filter(v => !v.mode || v.mode === "chat");
+ this.models = models.filter(v => !v.type || v.type === "chat");
} catch (err) {
toast("No available model");
console.error("Failed to load models", err);
diff --git a/assets/playground.html b/assets/playground.html
index 6ef1f1a..72c1e3e 100644
--- a/assets/playground.html
+++ b/assets/playground.html
@@ -741,7 +741,7 @@
async init() {
await Promise.all([
fetchJSON(MODELS_API).then(models => {
- this.models = models.filter(v => !v.mode || v.mode === "chat");
+ this.models = models.filter(v => !v.type || v.type === "chat");
}).catch(err => {
toast("No model available");
console.error("Failed to load models", err);
diff --git a/config.example.yaml b/config.example.yaml
index 80f6adc..ae9c53f 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -83,17 +83,16 @@ clients:
# name: xxxx # Only use it to distinguish clients with the same client type. Optional
# models:
# - name: xxxx
- # mode: chat # Chat model
# max_input_tokens: 100000
# supports_vision: true
# supports_function_calling: true
# - name: xxxx
- # mode: embedding # Embedding model
+ # type: embedding # Embedding model
# max_input_tokens: 2048
# default_chunk_size: 2000
# max_concurrent_chunks: 100
# - name: xxxx
- # mode: rerank # Rerank model
+ # type: rerank # Rerank model
# max_input_tokens: 2048
# patches:
# : # The regex to match model names, e.g. '.*' 'gpt-4o' 'gpt-4o|gpt-4-.*'
@@ -172,7 +171,7 @@ clients:
- name: llama3
max_input_tokens: 8192
- name: all-minilm:l6-v2
- mode: embedding
+ type: embedding
max_chunk_size: 1000
# See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart
diff --git a/models.yaml b/models.yaml
index 0d61152..ccb0c60 100644
--- a/models.yaml
+++ b/models.yaml
@@ -30,17 +30,17 @@
output_price: 1.5
supports_function_calling: true
- name: text-embedding-3-large
- mode: embedding
+ type: embedding
max_input_tokens: 8191
default_chunk_size: 3000
max_concurrent_chunks: 100
- name: text-embedding-3-small
- mode: embedding
+ type: embedding
max_input_tokens: 8191
default_chunk_size: 3000
max_concurrent_chunks: 100
- name: text-embedding-ada-002
- mode: embedding
+ type: embedding
max_input_tokens: 8191
default_chunk_size: 3000
max_concurrent_chunks: 100
@@ -74,7 +74,7 @@
output_price: 1.5
supports_function_calling: true
- name: text-embedding-004
- mode: embedding
+ type: embedding
max_input_tokens: 2048
default_chunk_size: 1500
max_concurrent_chunks: 5
@@ -152,7 +152,7 @@
input_price: 1
output_price: 3
- name: mistral-embed
- mode: embedding
+ type: embedding
max_input_tokens: 8092
default_chunk_size: 2000
@@ -173,20 +173,20 @@
output_price: 15
supports_function_calling: true
- name: embed-english-v3.0
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 96
- name: embed-multilingual-v3.0
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 96
- name: rerank-english-v3.0
- mode: rerank
+ type: rerank
max_input_tokens: 4096
- name: rerank-multilingual-v3.0
- mode: rerank
+ type: rerank
max_input_tokens: 4096
- platform: reka
@@ -304,12 +304,12 @@
output_price: 0.375
supports_function_calling: true
- name: text-embedding-004
- mode: embedding
+ type: embedding
max_input_tokens: 3072
default_chunk_size: 2000
max_concurrent_chunks: 5
- name: text-multilingual-embedding-002
- mode: embedding
+ type: embedding
max_input_tokens: 3072
default_chunk_size: 2000
max_concurrent_chunks: 5
@@ -457,12 +457,12 @@
input_price: 0
output_price: 0
- name: '@cf/baai/bge-base-en-v1.5'
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: '@cf/baai/bge-large-en-v1.5'
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
@@ -575,7 +575,7 @@
output_price: 2.8
supports_vision: true
- name: text-embedding-v2
- mode: embedding
+ type: embedding
max_input_tokens: 2048
default_chunk_size: 1500
max_concurrent_chunks: 25
@@ -649,7 +649,7 @@
output_price: 7
supports_vision: true
- name: embedding-2
- mode: embedding
+ type: embedding
max_input_tokens: 2048
default_chunk_size: 1500
@@ -718,12 +718,12 @@
input_price: 0.15
output_price: 0.15
- name: BAAI/bge-large-en-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 30
- name: thenlper/gte-large
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 30
@@ -770,52 +770,52 @@
input_price: 0.14
output_price: 0.14
- name: BAAI/bge-large-en-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: BAAI/bge-base-en-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: BAAI/bge-m3
- mode: embedding
+ type: embedding
max_input_tokens: 8192
default_chunk_size: 2000
max_concurrent_chunks: 100
- name: intfloat/e5-base-v2
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: intfloat/e5-large-v2
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: intfloat/multilingual-e5-large
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: sentence-transformers/all-MiniLM-L6-v2
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: sentence-transformers/paraphrase-MiniLM-L6-v2
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: thenlper/gte-base
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: thenlper/gte-large
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
@@ -873,22 +873,22 @@
output_price: 0.2
supports_vision: true
- name: nomic-ai/nomic-embed-text-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 8192
default_chunk_size: 1500
max_concurrent_chunks: 100
- name: WhereIsAI/UAE-Large-V1
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: thenlper/gte-large
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: thenlper/gte-base
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
@@ -1069,7 +1069,7 @@
input_price: 0.86
output_price: 0.86
- name: thenlper/gte-large
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
@@ -1110,17 +1110,17 @@
output_price: 0.9
max_concurrent_chunks: 100
- name: WhereIsAI/UAE-Large-V1
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: BAAI/bge-large-en-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
- name: BAAI/bge-base-en-v1.5
- mode: embedding
+ type: embedding
max_input_tokens: 512
default_chunk_size: 1000
max_concurrent_chunks: 100
\ No newline at end of file
diff --git a/src/client/common.rs b/src/client/common.rs
index 6c86acd..de2fa50 100644
--- a/src/client/common.rs
+++ b/src/client/common.rs
@@ -145,15 +145,15 @@ macro_rules! register_client {
}
pub fn list_chat_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> {
- list_models(config).into_iter().filter(|v| v.mode() == "chat").collect()
+ list_models(config).into_iter().filter(|v| v.model_type() == "chat").collect()
}
pub fn list_embedding_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> {
- list_models(config).into_iter().filter(|v| v.mode() == "embedding").collect()
+ list_models(config).into_iter().filter(|v| v.model_type() == "embedding").collect()
}
pub fn list_rerank_models(config: &$crate::config::Config) -> Vec<&'static $crate::client::Model> {
- list_models(config).into_iter().filter(|v| v.mode() == "rerank").collect()
+ list_models(config).into_iter().filter(|v| v.model_type() == "rerank").collect()
}
};
}
diff --git a/src/client/model.rs b/src/client/model.rs
index 06f5577..0584f82 100644
--- a/src/client/model.rs
+++ b/src/client/model.rs
@@ -111,8 +111,8 @@ impl Model {
&self.data.name
}
- pub fn mode(&self) -> &str {
- &self.data.mode
+ pub fn model_type(&self) -> &str {
+ &self.data.model_type
}
pub fn data(&self) -> &ModelData {
@@ -245,8 +245,8 @@ impl Model {
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ModelData {
pub name: String,
- #[serde(default = "default_model_mode")]
- pub mode: String,
+ #[serde(default = "default_model_type", rename = "type")]
+ pub model_type: String,
pub max_input_tokens: Option,
pub input_price: Option,
pub output_price: Option,
@@ -280,6 +280,6 @@ pub struct BuiltinModels {
pub models: Vec,
}
-fn default_model_mode() -> String {
+fn default_model_type() -> String {
"chat".into()
}