mirror of https://github.com/sigoden/aichat
feat: abandon replicate client (#900)
parent
419c626485
commit
e009f2e241
@ -1,150 +0,0 @@
|
|||||||
use super::message::*;
|
|
||||||
|
|
||||||
pub struct PromptFormat<'a> {
|
|
||||||
pub begin: &'a str,
|
|
||||||
pub system_pre_message: &'a str,
|
|
||||||
pub system_post_message: &'a str,
|
|
||||||
pub user_pre_message: &'a str,
|
|
||||||
pub user_post_message: &'a str,
|
|
||||||
pub assistant_pre_message: &'a str,
|
|
||||||
pub assistant_post_message: &'a str,
|
|
||||||
pub end: &'a str,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub const GENERIC_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "",
|
|
||||||
system_pre_message: "",
|
|
||||||
system_post_message: "\n",
|
|
||||||
user_pre_message: "### Instruction:\n",
|
|
||||||
user_post_message: "\n",
|
|
||||||
assistant_pre_message: "### Response:\n",
|
|
||||||
assistant_post_message: "\n",
|
|
||||||
end: "### Response:\n",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const MISTRAL_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "",
|
|
||||||
system_pre_message: "[INST] <<SYS>>",
|
|
||||||
system_post_message: "<</SYS>> [/INST]",
|
|
||||||
user_pre_message: "[INST]",
|
|
||||||
user_post_message: "[/INST]",
|
|
||||||
assistant_pre_message: "",
|
|
||||||
assistant_post_message: "",
|
|
||||||
end: "",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const LLAMA3_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "<|begin_of_text|>",
|
|
||||||
system_pre_message: "<|start_header_id|>system<|end_header_id|>\n\n",
|
|
||||||
system_post_message: "<|eot_id|>",
|
|
||||||
user_pre_message: "<|start_header_id|>user<|end_header_id|>\n\n",
|
|
||||||
user_post_message: "<|eot_id|>",
|
|
||||||
assistant_pre_message: "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
|
||||||
assistant_post_message: "<|eot_id|>",
|
|
||||||
end: "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const PHI3_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "",
|
|
||||||
system_pre_message: "<|system|>\n",
|
|
||||||
system_post_message: "<|end|>\n",
|
|
||||||
user_pre_message: "<|user|>\n",
|
|
||||||
user_post_message: "<|end|>\n",
|
|
||||||
assistant_pre_message: "<|assistant|>\n",
|
|
||||||
assistant_post_message: "<|end|>\n",
|
|
||||||
end: "<|assistant|>\n",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const COMMAND_R_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "",
|
|
||||||
system_pre_message: "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
|
|
||||||
system_post_message: "<|END_OF_TURN_TOKEN|>",
|
|
||||||
user_pre_message: "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
|
|
||||||
user_post_message: "<|END_OF_TURN_TOKEN|>",
|
|
||||||
assistant_pre_message: "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
|
||||||
assistant_post_message: "<|END_OF_TURN_TOKEN|>",
|
|
||||||
end: "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub const QWEN_PROMPT_FORMAT: PromptFormat<'static> = PromptFormat {
|
|
||||||
begin: "",
|
|
||||||
system_pre_message: "<|im_start|>system\n",
|
|
||||||
system_post_message: "<|im_end|>",
|
|
||||||
user_pre_message: "<|im_start|>user\n",
|
|
||||||
user_post_message: "<|im_end|>",
|
|
||||||
assistant_pre_message: "<|im_start|>assistant\n",
|
|
||||||
assistant_post_message: "<|im_end|>",
|
|
||||||
end: "<|im_start|>assistant\n",
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn generate_prompt(messages: &[Message], format: PromptFormat) -> anyhow::Result<String> {
|
|
||||||
let PromptFormat {
|
|
||||||
begin,
|
|
||||||
system_pre_message,
|
|
||||||
system_post_message,
|
|
||||||
user_pre_message,
|
|
||||||
user_post_message,
|
|
||||||
assistant_pre_message,
|
|
||||||
assistant_post_message,
|
|
||||||
end,
|
|
||||||
} = format;
|
|
||||||
let mut prompt = begin.to_string();
|
|
||||||
let mut image_urls = vec![];
|
|
||||||
for message in messages {
|
|
||||||
let role = &message.role;
|
|
||||||
let content = match &message.content {
|
|
||||||
MessageContent::Text(text) => text.clone(),
|
|
||||||
MessageContent::Array(list) => {
|
|
||||||
let mut parts = vec![];
|
|
||||||
for item in list {
|
|
||||||
match item {
|
|
||||||
MessageContentPart::Text { text } => parts.push(text.clone()),
|
|
||||||
MessageContentPart::ImageUrl {
|
|
||||||
image_url: ImageUrl { url },
|
|
||||||
} => {
|
|
||||||
image_urls.push(url.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
parts.join("\n\n")
|
|
||||||
}
|
|
||||||
MessageContent::ToolResults(_) => String::new(),
|
|
||||||
};
|
|
||||||
match role {
|
|
||||||
MessageRole::System => prompt.push_str(&format!(
|
|
||||||
"{system_pre_message}{content}{system_post_message}"
|
|
||||||
)),
|
|
||||||
MessageRole::Assistant => prompt.push_str(&format!(
|
|
||||||
"{assistant_pre_message}{content}{assistant_post_message}"
|
|
||||||
)),
|
|
||||||
MessageRole::User => {
|
|
||||||
prompt.push_str(&format!("{user_pre_message}{content}{user_post_message}"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !image_urls.is_empty() {
|
|
||||||
anyhow::bail!("The model does not support images: {:?}", image_urls);
|
|
||||||
}
|
|
||||||
prompt.push_str(end);
|
|
||||||
Ok(prompt)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn smart_prompt_format(model_name: &str) -> PromptFormat<'static> {
|
|
||||||
if model_name.contains("llama3") || model_name.contains("llama-3") {
|
|
||||||
LLAMA3_PROMPT_FORMAT
|
|
||||||
} else if model_name.contains("llama2")
|
|
||||||
|| model_name.contains("llama-2")
|
|
||||||
|| model_name.contains("mistral")
|
|
||||||
|| model_name.contains("mixtral")
|
|
||||||
{
|
|
||||||
MISTRAL_PROMPT_FORMAT
|
|
||||||
} else if model_name.contains("phi3") || model_name.contains("phi-3") {
|
|
||||||
PHI3_PROMPT_FORMAT
|
|
||||||
} else if model_name.contains("command-r") {
|
|
||||||
COMMAND_R_PROMPT_FORMAT
|
|
||||||
} else if model_name.contains("qwen") {
|
|
||||||
QWEN_PROMPT_FORMAT
|
|
||||||
} else {
|
|
||||||
GENERIC_PROMPT_FORMAT
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,195 +0,0 @@
|
|||||||
use super::prompt_format::*;
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
use anyhow::{anyhow, Result};
|
|
||||||
use reqwest::{Client as ReqwestClient, RequestBuilder};
|
|
||||||
use serde::Deserialize;
|
|
||||||
use serde_json::{json, Value};
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
const API_BASE: &str = "https://api.replicate.com/v1";
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Default)]
|
|
||||||
pub struct ReplicateConfig {
|
|
||||||
pub name: Option<String>,
|
|
||||||
pub api_key: Option<String>,
|
|
||||||
#[serde(default)]
|
|
||||||
pub models: Vec<ModelData>,
|
|
||||||
pub patch: Option<RequestPatch>,
|
|
||||||
pub extra: Option<ExtraConfig>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ReplicateClient {
|
|
||||||
config_get_fn!(api_key, get_api_key);
|
|
||||||
|
|
||||||
pub const PROMPTS: [PromptAction<'static>; 1] =
|
|
||||||
[("api_key", "API Key:", true, PromptKind::String)];
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
|
||||||
impl Client for ReplicateClient {
|
|
||||||
client_common_fns!();
|
|
||||||
|
|
||||||
async fn chat_completions_inner(
|
|
||||||
&self,
|
|
||||||
client: &ReqwestClient,
|
|
||||||
data: ChatCompletionsData,
|
|
||||||
) -> Result<ChatCompletionsOutput> {
|
|
||||||
let request_data = prepare_chat_completions(self, data)?;
|
|
||||||
let builder = self.request_builder(client, request_data, ApiType::ChatCompletions);
|
|
||||||
chat_completions(builder, client, &self.get_api_key()?).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn chat_completions_streaming_inner(
|
|
||||||
&self,
|
|
||||||
client: &ReqwestClient,
|
|
||||||
handler: &mut SseHandler,
|
|
||||||
data: ChatCompletionsData,
|
|
||||||
) -> Result<()> {
|
|
||||||
let request_data = prepare_chat_completions(self, data)?;
|
|
||||||
let builder = self.request_builder(client, request_data, ApiType::ChatCompletions);
|
|
||||||
chat_completions_streaming(builder, handler, client).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn prepare_chat_completions(
|
|
||||||
self_: &ReplicateClient,
|
|
||||||
data: ChatCompletionsData,
|
|
||||||
) -> Result<RequestData> {
|
|
||||||
let api_key = self_.get_api_key()?;
|
|
||||||
|
|
||||||
let url = format!("{API_BASE}/models/{}/predictions", self_.model.name());
|
|
||||||
|
|
||||||
let body = build_chat_completions_body(data, &self_.model)?;
|
|
||||||
|
|
||||||
let mut request_data = RequestData::new(url, body);
|
|
||||||
|
|
||||||
request_data.bearer_auth(api_key);
|
|
||||||
|
|
||||||
Ok(request_data)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn chat_completions(
|
|
||||||
builder: RequestBuilder,
|
|
||||||
client: &ReqwestClient,
|
|
||||||
api_key: &str,
|
|
||||||
) -> Result<ChatCompletionsOutput> {
|
|
||||||
let res = builder.send().await?;
|
|
||||||
let status = res.status();
|
|
||||||
let data: Value = res.json().await?;
|
|
||||||
if !status.is_success() {
|
|
||||||
catch_error(&data, status.as_u16())?;
|
|
||||||
}
|
|
||||||
let prediction_url = data["urls"]["get"]
|
|
||||||
.as_str()
|
|
||||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
|
||||||
loop {
|
|
||||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
|
||||||
let prediction_data: Value = client
|
|
||||||
.get(prediction_url)
|
|
||||||
.bearer_auth(api_key)
|
|
||||||
.send()
|
|
||||||
.await?
|
|
||||||
.json()
|
|
||||||
.await?;
|
|
||||||
debug!("non-stream-data: {prediction_data}");
|
|
||||||
let err = || anyhow!("Invalid response data: {prediction_data}");
|
|
||||||
let status = prediction_data["status"].as_str().ok_or_else(err)?;
|
|
||||||
if status == "succeeded" {
|
|
||||||
return extract_chat_completions(&prediction_data);
|
|
||||||
} else if status == "failed" || status == "canceled" {
|
|
||||||
return Err(err());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn chat_completions_streaming(
|
|
||||||
builder: RequestBuilder,
|
|
||||||
handler: &mut SseHandler,
|
|
||||||
client: &ReqwestClient,
|
|
||||||
) -> Result<()> {
|
|
||||||
let res = builder.send().await?;
|
|
||||||
let status = res.status();
|
|
||||||
let data: Value = res.json().await?;
|
|
||||||
if !status.is_success() {
|
|
||||||
catch_error(&data, status.as_u16())?;
|
|
||||||
}
|
|
||||||
let stream_url = data["urls"]["stream"]
|
|
||||||
.as_str()
|
|
||||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
|
||||||
|
|
||||||
let sse_builder = client.get(stream_url).header("accept", "text/event-stream");
|
|
||||||
|
|
||||||
let handle = |message: SseMmessage| -> Result<bool> {
|
|
||||||
if message.event == "done" {
|
|
||||||
return Ok(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("stream-data: {}", message.data);
|
|
||||||
|
|
||||||
handler.text(&message.data)?;
|
|
||||||
Ok(false)
|
|
||||||
};
|
|
||||||
sse_stream(sse_builder, handle).await
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_chat_completions_body(data: ChatCompletionsData, model: &Model) -> Result<Value> {
|
|
||||||
let ChatCompletionsData {
|
|
||||||
messages,
|
|
||||||
temperature,
|
|
||||||
top_p,
|
|
||||||
functions: _,
|
|
||||||
stream,
|
|
||||||
} = data;
|
|
||||||
|
|
||||||
let prompt = generate_prompt(&messages, smart_prompt_format(model.name()))?;
|
|
||||||
|
|
||||||
let mut input = json!({
|
|
||||||
"prompt": prompt,
|
|
||||||
"prompt_template": "{prompt}"
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Some(v) = model.max_tokens_param() {
|
|
||||||
input["max_tokens"] = v.into();
|
|
||||||
input["max_new_tokens"] = v.into();
|
|
||||||
}
|
|
||||||
if let Some(v) = temperature {
|
|
||||||
input["temperature"] = v.into();
|
|
||||||
}
|
|
||||||
if let Some(v) = top_p {
|
|
||||||
input["top_p"] = v.into();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut body = json!({
|
|
||||||
"input": input,
|
|
||||||
});
|
|
||||||
|
|
||||||
if stream {
|
|
||||||
body["stream"] = true.into();
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(body)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract_chat_completions(data: &Value) -> Result<ChatCompletionsOutput> {
|
|
||||||
let text = data["output"]
|
|
||||||
.as_array()
|
|
||||||
.map(|parts| {
|
|
||||||
parts
|
|
||||||
.iter()
|
|
||||||
.filter_map(|v| v.as_str().map(|v| v.to_string()))
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join("")
|
|
||||||
})
|
|
||||||
.ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
|
|
||||||
|
|
||||||
let output = ChatCompletionsOutput {
|
|
||||||
text: text.to_string(),
|
|
||||||
tool_calls: vec![],
|
|
||||||
id: data["id"].as_str().map(|v| v.to_string()),
|
|
||||||
input_tokens: data["metrics"]["input_token_count"].as_u64(),
|
|
||||||
output_tokens: data["metrics"]["output_token_count"].as_u64(),
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(output)
|
|
||||||
}
|
|
Loading…
Reference in New Issue