refactor: several optimizations (#749)

pull/750/head
sigoden 1 month ago committed by GitHub
parent 3f7ce25709
commit fdc35a1cf3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

1
.gitignore vendored

@ -1,3 +1,4 @@
/target
/tmp
/.env
*.log

@ -391,7 +391,6 @@ pub trait Client: Sync + Send {
async fn embeddings(&self, data: EmbeddingsData) -> Result<Vec<Vec<f32>>> {
let client = self.build_client()?;
self.model().guard_max_batch_size(&data)?;
self.embeddings_inner(&client, data)
.await
.context("Failed to call embeddings api")

@ -1,7 +1,6 @@
use super::{
list_chat_models, list_embedding_models, list_reranker_models,
message::{Message, MessageContent},
EmbeddingsData,
};
use crate::config::Config;
@ -254,13 +253,6 @@ impl Model {
}
Ok(())
}
pub fn guard_max_batch_size(&self, data: &EmbeddingsData) -> Result<()> {
if data.texts.len() > self.max_batch_size() {
bail!("Exceed max_batch_size limit");
}
Ok(())
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]

@ -163,7 +163,7 @@ impl Server {
self.arena_page()
} else {
status = StatusCode::NOT_FOUND;
Err(anyhow!("The requested endpoint was not found."))
Err(anyhow!("Not Found"))
};
let mut res = match res {
Ok(res) => {
@ -171,7 +171,9 @@ impl Server {
res
}
Err(err) => {
status = StatusCode::BAD_REQUEST;
if status == StatusCode::OK {
status = StatusCode::BAD_REQUEST;
}
error!("{method} {uri} {} {err}", status.as_u16());
ret_err(err)
}

Loading…
Cancel
Save