mirror of https://github.com/sigoden/aichat
feat: abandon replicate client (#900)
parent
419c626485
commit
e009f2e241
@ -1,150 +0,0 @@
|
||||
use super::message::*;
|
||||
|
||||
pub struct PromptFormat<'a> {
|
||||
pub begin: &'a str,
|
||||
pub system_pre_message: &'a str,
|
||||
pub system_post_message: &'a str,
|
||||
pub user_pre_message: &'a str,
|
||||
pub user_post_message: &'a str,
|
||||
pub assistant_pre_message: &'a str,
|
||||
pub assistant_post_message: &'a str,
|
||||
pub end: &'a str,
|
||||
}
|
||||
|
||||
pub const GENERIC_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "",
|
||||
system_pre_message: "",
|
||||
system_post_message: "\n",
|
||||
user_pre_message: "### Instruction:\n",
|
||||
user_post_message: "\n",
|
||||
assistant_pre_message: "### Response:\n",
|
||||
assistant_post_message: "\n",
|
||||
end: "### Response:\n",
|
||||
};
|
||||
|
||||
pub const MISTRAL_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "",
|
||||
system_pre_message: "[INST] <<SYS>>",
|
||||
system_post_message: "<</SYS>> [/INST]",
|
||||
user_pre_message: "[INST]",
|
||||
user_post_message: "[/INST]",
|
||||
assistant_pre_message: "",
|
||||
assistant_post_message: "",
|
||||
end: "",
|
||||
};
|
||||
|
||||
pub const LLAMA3_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "<|begin_of_text|>",
|
||||
system_pre_message: "<|start_header_id|>system<|end_header_id|>\n\n",
|
||||
system_post_message: "<|eot_id|>",
|
||||
user_pre_message: "<|start_header_id|>user<|end_header_id|>\n\n",
|
||||
user_post_message: "<|eot_id|>",
|
||||
assistant_pre_message: "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
assistant_post_message: "<|eot_id|>",
|
||||
end: "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
};
|
||||
|
||||
pub const PHI3_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "",
|
||||
system_pre_message: "<|system|>\n",
|
||||
system_post_message: "<|end|>\n",
|
||||
user_pre_message: "<|user|>\n",
|
||||
user_post_message: "<|end|>\n",
|
||||
assistant_pre_message: "<|assistant|>\n",
|
||||
assistant_post_message: "<|end|>\n",
|
||||
end: "<|assistant|>\n",
|
||||
};
|
||||
|
||||
pub const COMMAND_R_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "",
|
||||
system_pre_message: "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
|
||||
system_post_message: "<|END_OF_TURN_TOKEN|>",
|
||||
user_pre_message: "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
|
||||
user_post_message: "<|END_OF_TURN_TOKEN|>",
|
||||
assistant_pre_message: "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
assistant_post_message: "<|END_OF_TURN_TOKEN|>",
|
||||
end: "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
};
|
||||
|
||||
pub const QWEN_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
||||
begin: "",
|
||||
system_pre_message: "<|im_start|>system\n",
|
||||
system_post_message: "<|im_end|>",
|
||||
user_pre_message: "<|im_start|>user\n",
|
||||
user_post_message: "<|im_end|>",
|
||||
assistant_pre_message: "<|im_start|>assistant\n",
|
||||
assistant_post_message: "<|im_end|>",
|
||||
end: "<|im_start|>assistant\n",
|
||||
};
|
||||
|
||||
pub fn generate_prompt(messages: &[Message], format: PromptFormat) -> anyhow::Result<String> {
|
||||
let PromptFormat {
|
||||
begin,
|
||||
system_pre_message,
|
||||
system_post_message,
|
||||
user_pre_message,
|
||||
user_post_message,
|
||||
assistant_pre_message,
|
||||
assistant_post_message,
|
||||
end,
|
||||
} = format;
|
||||
let mut prompt = begin.to_string();
|
||||
let mut image_urls = vec![];
|
||||
for message in messages {
|
||||
let role = &message.role;
|
||||
let content = match &message.content {
|
||||
MessageContent::Text(text) => text.clone(),
|
||||
MessageContent::Array(list) => {
|
||||
let mut parts = vec![];
|
||||
for item in list {
|
||||
match item {
|
||||
MessageContentPart::Text { text } => parts.push(text.clone()),
|
||||
MessageContentPart::ImageUrl {
|
||||
image_url: ImageUrl { url },
|
||||
} => {
|
||||
image_urls.push(url.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
parts.join("\n\n")
|
||||
}
|
||||
MessageContent::ToolResults(_) => String::new(),
|
||||
};
|
||||
match role {
|
||||
MessageRole::System => prompt.push_str(&format!(
|
||||
"{system_pre_message}{content}{system_post_message}"
|
||||
)),
|
||||
MessageRole::Assistant => prompt.push_str(&format!(
|
||||
"{assistant_pre_message}{content}{assistant_post_message}"
|
||||
)),
|
||||
MessageRole::User => {
|
||||
prompt.push_str(&format!("{user_pre_message}{content}{user_post_message}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
if !image_urls.is_empty() {
|
||||
anyhow::bail!("The model does not support images: {:?}", image_urls);
|
||||
}
|
||||
prompt.push_str(end);
|
||||
Ok(prompt)
|
||||
}
|
||||
|
||||
pub fn smart_prompt_format(model_name: &str) -> PromptFormat<'static> {
|
||||
if model_name.contains("llama3") || model_name.contains("llama-3") {
|
||||
LLAMA3_PROMPT_FORMAT
|
||||
} else if model_name.contains("llama2")
|
||||
|| model_name.contains("llama-2")
|
||||
|| model_name.contains("mistral")
|
||||
|| model_name.contains("mixtral")
|
||||
{
|
||||
MISTRAL_PROMPT_FORMAT
|
||||
} else if model_name.contains("phi3") || model_name.contains("phi-3") {
|
||||
PHI3_PROMPT_FORMAT
|
||||
} else if model_name.contains("command-r") {
|
||||
COMMAND_R_PROMPT_FORMAT
|
||||
} else if model_name.contains("qwen") {
|
||||
QWEN_PROMPT_FORMAT
|
||||
} else {
|
||||
GENERIC_PROMPT_FORMAT
|
||||
}
|
||||
}
|
@ -1,195 +0,0 @@
|
||||
use super::prompt_format::*;
|
||||
use super::*;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use reqwest::{Client as ReqwestClient, RequestBuilder};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Value};
|
||||
use std::time::Duration;
|
||||
|
||||
const API_BASE: &str = "https://api.replicate.com/v1";
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct ReplicateConfig {
|
||||
pub name: Option<String>,
|
||||
pub api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub models: Vec<ModelData>,
|
||||
pub patch: Option<RequestPatch>,
|
||||
pub extra: Option<ExtraConfig>,
|
||||
}
|
||||
|
||||
impl ReplicateClient {
|
||||
config_get_fn!(api_key, get_api_key);
|
||||
|
||||
pub const PROMPTS: [PromptAction<'static>; 1] =
|
||||
[("api_key", "API Key:", true, PromptKind::String)];
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Client for ReplicateClient {
|
||||
client_common_fns!();
|
||||
|
||||
async fn chat_completions_inner(
|
||||
&self,
|
||||
client: &ReqwestClient,
|
||||
data: ChatCompletionsData,
|
||||
) -> Result<ChatCompletionsOutput> {
|
||||
let request_data = prepare_chat_completions(self, data)?;
|
||||
let builder = self.request_builder(client, request_data, ApiType::ChatCompletions);
|
||||
chat_completions(builder, client, &self.get_api_key()?).await
|
||||
}
|
||||
|
||||
async fn chat_completions_streaming_inner(
|
||||
&self,
|
||||
client: &ReqwestClient,
|
||||
handler: &mut SseHandler,
|
||||
data: ChatCompletionsData,
|
||||
) -> Result<()> {
|
||||
let request_data = prepare_chat_completions(self, data)?;
|
||||
let builder = self.request_builder(client, request_data, ApiType::ChatCompletions);
|
||||
chat_completions_streaming(builder, handler, client).await
|
||||
}
|
||||
}
|
||||
|
||||
fn prepare_chat_completions(
|
||||
self_: &ReplicateClient,
|
||||
data: ChatCompletionsData,
|
||||
) -> Result<RequestData> {
|
||||
let api_key = self_.get_api_key()?;
|
||||
|
||||
let url = format!("{API_BASE}/models/{}/predictions", self_.model.name());
|
||||
|
||||
let body = build_chat_completions_body(data, &self_.model)?;
|
||||
|
||||
let mut request_data = RequestData::new(url, body);
|
||||
|
||||
request_data.bearer_auth(api_key);
|
||||
|
||||
Ok(request_data)
|
||||
}
|
||||
|
||||
async fn chat_completions(
|
||||
builder: RequestBuilder,
|
||||
client: &ReqwestClient,
|
||||
api_key: &str,
|
||||
) -> Result<ChatCompletionsOutput> {
|
||||
let res = builder.send().await?;
|
||||
let status = res.status();
|
||||
let data: Value = res.json().await?;
|
||||
if !status.is_success() {
|
||||
catch_error(&data, status.as_u16())?;
|
||||
}
|
||||
let prediction_url = data["urls"]["get"]
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
let prediction_data: Value = client
|
||||
.get(prediction_url)
|
||||
.bearer_auth(api_key)
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
debug!("non-stream-data: {prediction_data}");
|
||||
let err = || anyhow!("Invalid response data: {prediction_data}");
|
||||
let status = prediction_data["status"].as_str().ok_or_else(err)?;
|
||||
if status == "succeeded" {
|
||||
return extract_chat_completions(&prediction_data);
|
||||
} else if status == "failed" || status == "canceled" {
|
||||
return Err(err());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn chat_completions_streaming(
|
||||
builder: RequestBuilder,
|
||||
handler: &mut SseHandler,
|
||||
client: &ReqwestClient,
|
||||
) -> Result<()> {
|
||||
let res = builder.send().await?;
|
||||
let status = res.status();
|
||||
let data: Value = res.json().await?;
|
||||
if !status.is_success() {
|
||||
catch_error(&data, status.as_u16())?;
|
||||
}
|
||||
let stream_url = data["urls"]["stream"]
|
||||
.as_str()
|
||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
||||
|
||||
let sse_builder = client.get(stream_url).header("accept", "text/event-stream");
|
||||
|
||||
let handle = |message: SseMmessage| -> Result<bool> {
|
||||
if message.event == "done" {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
debug!("stream-data: {}", message.data);
|
||||
|
||||
handler.text(&message.data)?;
|
||||
Ok(false)
|
||||
};
|
||||
sse_stream(sse_builder, handle).await
|
||||
}
|
||||
|
||||
fn build_chat_completions_body(data: ChatCompletionsData, model: &Model) -> Result<Value> {
|
||||
let ChatCompletionsData {
|
||||
messages,
|
||||
temperature,
|
||||
top_p,
|
||||
functions: _,
|
||||
stream,
|
||||
} = data;
|
||||
|
||||
let prompt = generate_prompt(&messages, smart_prompt_format(model.name()))?;
|
||||
|
||||
let mut input = json!({
|
||||
"prompt": prompt,
|
||||
"prompt_template": "{prompt}"
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
input["max_tokens"] = v.into();
|
||||
input["max_new_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
input["temperature"] = v.into();
|
||||
}
|
||||
if let Some(v) = top_p {
|
||||
input["top_p"] = v.into();
|
||||
}
|
||||
|
||||
let mut body = json!({
|
||||
"input": input,
|
||||
});
|
||||
|
||||
if stream {
|
||||
body["stream"] = true.into();
|
||||
}
|
||||
|
||||
Ok(body)
|
||||
}
|
||||
|
||||
fn extract_chat_completions(data: &Value) -> Result<ChatCompletionsOutput> {
|
||||
let text = data["output"]
|
||||
.as_array()
|
||||
.map(|parts| {
|
||||
parts
|
||||
.iter()
|
||||
.filter_map(|v| v.as_str().map(|v| v.to_string()))
|
||||
.collect::<Vec<String>>()
|
||||
.join("")
|
||||
})
|
||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
||||
|
||||
let output = ChatCompletionsOutput {
|
||||
text: text.to_string(),
|
||||
tool_calls: vec![],
|
||||
id: data["id"].as_str().map(|v| v.to_string()),
|
||||
input_tokens: data["metrics"]["input_token_count"].as_u64(),
|
||||
output_tokens: data["metrics"]["output_token_count"].as_u64(),
|
||||
};
|
||||
|
||||
Ok(output)
|
||||
}
|
Loading…
Reference in New Issue