From fdc35a1cf3043eecbdffa1abfca6b46f0bf44942 Mon Sep 17 00:00:00 2001 From: sigoden Date: Fri, 26 Jul 2024 20:03:41 +0800 Subject: [PATCH] refactor: several optimizations (#749) --- .gitignore | 1 + src/client/common.rs | 1 - src/client/model.rs | 8 -------- src/serve.rs | 6 ++++-- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index b7d673f..af2d186 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /tmp +/.env *.log \ No newline at end of file diff --git a/src/client/common.rs b/src/client/common.rs index 198fabc..1d802c5 100644 --- a/src/client/common.rs +++ b/src/client/common.rs @@ -391,7 +391,6 @@ pub trait Client: Sync + Send { async fn embeddings(&self, data: EmbeddingsData) -> Result>> { let client = self.build_client()?; - self.model().guard_max_batch_size(&data)?; self.embeddings_inner(&client, data) .await .context("Failed to call embeddings api") diff --git a/src/client/model.rs b/src/client/model.rs index e6142d4..acf04a3 100644 --- a/src/client/model.rs +++ b/src/client/model.rs @@ -1,7 +1,6 @@ use super::{ list_chat_models, list_embedding_models, list_reranker_models, message::{Message, MessageContent}, - EmbeddingsData, }; use crate::config::Config; @@ -254,13 +253,6 @@ impl Model { } Ok(()) } - - pub fn guard_max_batch_size(&self, data: &EmbeddingsData) -> Result<()> { - if data.texts.len() > self.max_batch_size() { - bail!("Exceed max_batch_size limit"); - } - Ok(()) - } } #[derive(Debug, Clone, Default, Serialize, Deserialize)] diff --git a/src/serve.rs b/src/serve.rs index 344a53f..11b132c 100644 --- a/src/serve.rs +++ b/src/serve.rs @@ -163,7 +163,7 @@ impl Server { self.arena_page() } else { status = StatusCode::NOT_FOUND; - Err(anyhow!("The requested endpoint was not found.")) + Err(anyhow!("Not Found")) }; let mut res = match res { Ok(res) => { @@ -171,7 +171,9 @@ impl Server { res } Err(err) => { - status = StatusCode::BAD_REQUEST; + if status == StatusCode::OK { + status = StatusCode::BAD_REQUEST; + } error!("{method} {uri} {} {err}", status.as_u16()); ret_err(err) }