From 179a5f574982c7973543dcfeafdb8647b01c3634 Mon Sep 17 00:00:00 2001 From: sigoden Date: Fri, 16 Feb 2024 18:32:33 +0800 Subject: [PATCH] refactor: update vertexai/gemini/ernie clients (#309) --- config.example.yaml | 7 +- src/client/ernie.rs | 11 +- src/client/gemini.rs | 162 +----------------------- src/client/vertexai.rs | 274 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 275 insertions(+), 179 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 77ee50b..9f02b4c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -31,7 +31,7 @@ clients: - type: gemini api_key: AIxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - # For https://github.com/go-skynet/LocalAI or any OpenAI compatible API providers + # Any openai-compatible API providers or https://github.com/go-skynet/LocalAI - type: localai api_base: http://localhost:8080/v1 api_key: xxx @@ -74,4 +74,7 @@ clients: # See https://cloud.google.com/vertex-ai - type: vertexai api_base: https://{REGION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION}/publishers/google/models - api_key: xxx \ No newline at end of file + # Setup Application Default Credentials (ADC) file, Optional field + # Run `gcloud auth application-default login` to setup adc + # see https://cloud.google.com/docs/authentication/external/set-up-adc + adc_file: \ No newline at end of file diff --git a/src/client/ernie.rs b/src/client/ernie.rs index 7848cf8..14f4404 100644 --- a/src/client/ernie.rs +++ b/src/client/ernie.rs @@ -101,7 +101,8 @@ impl ErnieClient { .or_else(|| env::var(format!("{env_prefix}_SECRET_KEY")).ok()) .ok_or_else(|| anyhow!("Miss secret_key"))?; - let token = fetch_access_token(&api_key, &secret_key) + let client = self.build_client()?; + let token = fetch_access_token(&client, &api_key, &secret_key) .await .with_context(|| "Failed to fetch access token")?; unsafe { ACCESS_TOKEN = token }; @@ -204,9 +205,13 @@ fn build_body(data: SendData, _model: String) -> Value { body } -async fn fetch_access_token(api_key: &str, secret_key: &str) -> Result { +async fn fetch_access_token( + client: &reqwest::Client, + api_key: &str, + secret_key: &str, +) -> Result { let url = format!("{ACCESS_TOKEN_URL}?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}"); - let value: Value = reqwest::get(&url).await?.json().await?; + let value: Value = client.get(&url).send().await?.json().await?; let result = value["access_token"].as_str().ok_or_else(|| { if let Some(err_msg) = value["error_description"].as_str() { anyhow!("{err_msg}") diff --git a/src/client/gemini.rs b/src/client/gemini.rs index e846ab1..1c76fd8 100644 --- a/src/client/gemini.rs +++ b/src/client/gemini.rs @@ -1,16 +1,12 @@ -use super::{ - message::*, patch_system_message, Client, ExtraConfig, GeminiClient, Model, PromptType, - SendData, TokensCountFactors, -}; +use super::vertexai::{build_body, send_message, send_message_streaming}; +use super::{Client, ExtraConfig, GeminiClient, Model, PromptType, SendData, TokensCountFactors}; use crate::{render::ReplyHandler, utils::PromptKind}; -use anyhow::{anyhow, bail, Result}; +use anyhow::Result; use async_trait::async_trait; -use futures_util::StreamExt; use reqwest::{Client as ReqwestClient, RequestBuilder}; use serde::Deserialize; -use serde_json::{json, Value}; const API_BASE: &str = "https://generativelanguage.googleapis.com/v1beta/models/"; @@ -88,155 +84,3 @@ impl GeminiClient { Ok(builder) } } - -pub(crate) async fn send_message(builder: RequestBuilder) -> Result { - let res = builder.send().await?; - let status = res.status(); - let data: Value = res.json().await?; - if status != 200 { - check_error(&data)?; - } - let output = data["candidates"][0]["content"]["parts"][0]["text"] - .as_str() - .ok_or_else(|| anyhow!("Invalid response data: {data}"))?; - Ok(output.to_string()) -} - -pub(crate) async fn send_message_streaming(builder: RequestBuilder, handler: &mut ReplyHandler) -> Result<()> { - let res = builder.send().await?; - if res.status() != 200 { - let data: Value = res.json().await?; - check_error(&data)?; - } else { - let mut buffer = vec![]; - let mut cursor = 0; - let mut start = 0; - let mut balances = vec![]; - let mut quoting = false; - let mut stream = res.bytes_stream(); - while let Some(chunk) = stream.next().await { - let chunk = chunk?; - let chunk = std::str::from_utf8(&chunk)?; - buffer.extend(chunk.chars()); - for i in cursor..buffer.len() { - let ch = buffer[i]; - if quoting { - if ch == '"' && buffer[i - 1] != '\\' { - quoting = false; - } - continue; - } - match ch { - '"' => quoting = true, - '{' => { - if balances.is_empty() { - start = i; - } - balances.push(ch); - } - '[' => { - if start != 0 { - balances.push(ch); - } - } - '}' => { - balances.pop(); - if balances.is_empty() { - let value: String = buffer[start..=i].iter().collect(); - let value: Value = serde_json::from_str(&value)?; - if let Some(text) = - value["candidates"][0]["content"]["parts"][0]["text"].as_str() - { - handler.text(text)?; - } else { - bail!("Invalid response data: {value}") - } - } - } - ']' => { - balances.pop(); - } - _ => {} - } - } - cursor = buffer.len(); - } - } - Ok(()) -} - -fn check_error(data: &Value) -> Result<()> { - if let Some((Some(status), Some(message))) = data[0]["error"].as_object().map(|v| { - ( - v.get("status").and_then(|v| v.as_str()), - v.get("message").and_then(|v| v.as_str()), - ) - }) { - bail!("{status}: {message}") - } else { - bail!("Error {}", data); - } -} - -pub(crate) fn build_body(data: SendData, _model: String) -> Result { - let SendData { - mut messages, - temperature, - .. - } = data; - - patch_system_message(&mut messages); - - let mut network_image_urls = vec![]; - let contents: Vec = messages - .into_iter() - .map(|message| { - let role = match message.role { - MessageRole::User => "user", - _ => "model", - }; - match message.content { - MessageContent::Text(text) => json!({ - "role": role, - "parts": [{ "text": text }] - }), - MessageContent::Array(list) => { - let list: Vec = list - .into_iter() - .map(|item| match item { - MessageContentPart::Text { text } => json!({"text": text}), - MessageContentPart::ImageUrl { image_url: ImageUrl { url } } => { - if let Some((mime_type, data)) = url.strip_prefix("data:").and_then(|v| v.split_once(";base64,")) { - json!({ "inline_data": { "mime_type": mime_type, "data": data } }) - } else { - network_image_urls.push(url.clone()); - json!({ "url": url }) - } - }, - }) - .collect(); - json!({ "role": role, "parts": list }) - } - } - }) - .collect(); - - if !network_image_urls.is_empty() { - bail!( - "The model does not support network images: {:?}", - network_image_urls - ); - } - - let mut body = json!({ - "contents": contents, - }); - - if let Some(temperature) = temperature { - body["generationConfig"] = json!({ - "temperature": temperature, - }); - } - - Ok(body) -} diff --git a/src/client/vertexai.rs b/src/client/vertexai.rs index 890aedd..50e4b21 100644 --- a/src/client/vertexai.rs +++ b/src/client/vertexai.rs @@ -1,28 +1,36 @@ use super::{ - Client, ExtraConfig, VertexAIClient, Model, PromptType, - SendData, TokensCountFactors, + message::*, patch_system_message, Client, ExtraConfig, Model, PromptType, SendData, + TokensCountFactors, VertexAIClient, }; -use super::gemini::{build_body, send_message, send_message_streaming}; use crate::{render::ReplyHandler, utils::PromptKind}; -use anyhow::Result; +use anyhow::{anyhow, bail, Context, Result}; use async_trait::async_trait; +use chrono::{Duration, Utc}; +use futures_util::StreamExt; use reqwest::{Client as ReqwestClient, RequestBuilder}; use serde::Deserialize; +use serde_json::{json, Value}; +use std::path::PathBuf; -const MODELS: [(&str, usize, &str); 2] = [ - ("gemini-pro", 32760, "text"), - ("gemini-pro-vision", 16384, "text,vision"), +const MODELS: [(&str, usize, &str); 5] = [ + ("gemini-1.0-pro", 32760, "text"), + ("gemini.1.0-pro-vision", 16384, "text,vision"), + ("gemini-1.0-ultra", 8192, "text"), + ("gemini.1.0-ultra-vision", 8192, "text,vision"), + ("gemini-1.5-pro", 1000000, "text"), ]; const TOKENS_COUNT_FACTORS: TokensCountFactors = (5, 2); +static mut ACCESS_TOKEN: (String, i64) = (String::new(), 0); // safe under linear operation + #[derive(Debug, Clone, Deserialize, Default)] pub struct VertexAIConfig { pub name: Option, pub api_base: Option, - pub api_key: Option, + pub adc_file: Option, pub extra: Option, } @@ -31,6 +39,7 @@ impl Client for VertexAIClient { client_common_fns!(); async fn send_message_inner(&self, client: &ReqwestClient, data: SendData) -> Result { + self.prepare_access_token().await?; let builder = self.request_builder(client, data)?; send_message(builder).await } @@ -41,6 +50,7 @@ impl Client for VertexAIClient { handler: &mut ReplyHandler, data: SendData, ) -> Result<()> { + self.prepare_access_token().await?; let builder = self.request_builder(client, data)?; send_message_streaming(builder, handler).await } @@ -48,12 +58,9 @@ impl Client for VertexAIClient { impl VertexAIClient { config_get_fn!(api_base, get_api_base); - config_get_fn!(api_key, get_api_key); - pub const PROMPTS: [PromptType<'static>; 2] = [ - ("api_base", "API Base:", true, PromptKind::String), - ("api_key", "API Key:", true, PromptKind::String), - ]; + pub const PROMPTS: [PromptType<'static>; 1] = + [("api_base", "API Base:", true, PromptKind::String)]; pub fn list_models(local_config: &VertexAIConfig) -> Vec { let client_name = Self::name(local_config); @@ -70,7 +77,6 @@ impl VertexAIClient { fn request_builder(&self, client: &ReqwestClient, data: SendData) -> Result { let api_base = self.get_api_base()?; - let api_key = self.get_api_key()?; let func = match data.stream { true => "streamGenerateContent", @@ -85,8 +91,246 @@ impl VertexAIClient { debug!("VertexAI Request: {url} {body}"); - let builder = client.post(url).bearer_auth(api_key).json(&body); + let builder = client + .post(url) + .bearer_auth(unsafe { &ACCESS_TOKEN.0 }) + .json(&body); Ok(builder) } + + async fn prepare_access_token(&self) -> Result<()> { + if unsafe { ACCESS_TOKEN.0.is_empty() || Utc::now().timestamp() > ACCESS_TOKEN.1 } { + let client = self.build_client()?; + let (token, expires_in) = fetch_access_token(&client, &self.config.adc_file) + .await + .with_context(|| "Failed to fetch access token")?; + let expires_at = Utc::now() + Duration::seconds(expires_in); + unsafe { ACCESS_TOKEN = (token, expires_at.timestamp()) }; + } + Ok(()) + } +} + +pub(crate) async fn send_message(builder: RequestBuilder) -> Result { + let res = builder.send().await?; + let status = res.status(); + let data: Value = res.json().await?; + if status != 200 { + check_error(&data)?; + } + let output = data["candidates"][0]["content"]["parts"][0]["text"] + .as_str() + .ok_or_else(|| anyhow!("Invalid response data: {data}"))?; + Ok(output.to_string()) +} + +pub(crate) async fn send_message_streaming( + builder: RequestBuilder, + handler: &mut ReplyHandler, +) -> Result<()> { + let res = builder.send().await?; + if res.status() != 200 { + let data: Value = res.json().await?; + check_error(&data)?; + } else { + let mut buffer = vec![]; + let mut cursor = 0; + let mut start = 0; + let mut balances = vec![]; + let mut quoting = false; + let mut stream = res.bytes_stream(); + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + let chunk = std::str::from_utf8(&chunk)?; + buffer.extend(chunk.chars()); + for i in cursor..buffer.len() { + let ch = buffer[i]; + if quoting { + if ch == '"' && buffer[i - 1] != '\\' { + quoting = false; + } + continue; + } + match ch { + '"' => quoting = true, + '{' => { + if balances.is_empty() { + start = i; + } + balances.push(ch); + } + '[' => { + if start != 0 { + balances.push(ch); + } + } + '}' => { + balances.pop(); + if balances.is_empty() { + let value: String = buffer[start..=i].iter().collect(); + let value: Value = serde_json::from_str(&value)?; + if let Some(text) = + value["candidates"][0]["content"]["parts"][0]["text"].as_str() + { + handler.text(text)?; + } else { + bail!("Invalid response data: {value}") + } + } + } + ']' => { + balances.pop(); + } + _ => {} + } + } + cursor = buffer.len(); + } + } + Ok(()) +} + +fn check_error(data: &Value) -> Result<()> { + if let Some((Some(status), Some(message))) = data[0]["error"].as_object().map(|v| { + ( + v.get("status").and_then(|v| v.as_str()), + v.get("message").and_then(|v| v.as_str()), + ) + }) { + if status == "UNAUTHENTICATED" { + unsafe { ACCESS_TOKEN = (String::new(), 0) } + } + bail!("{status}: {message}") + } else { + bail!("Error {}", data); + } +} + +pub(crate) fn build_body(data: SendData, _model: String) -> Result { + let SendData { + mut messages, + temperature, + .. + } = data; + + patch_system_message(&mut messages); + + let mut network_image_urls = vec![]; + let contents: Vec = messages + .into_iter() + .map(|message| { + let role = match message.role { + MessageRole::User => "user", + _ => "model", + }; + match message.content { + MessageContent::Text(text) => json!({ + "role": role, + "parts": [{ "text": text }] + }), + MessageContent::Array(list) => { + let list: Vec = list + .into_iter() + .map(|item| match item { + MessageContentPart::Text { text } => json!({"text": text}), + MessageContentPart::ImageUrl { image_url: ImageUrl { url } } => { + if let Some((mime_type, data)) = url.strip_prefix("data:").and_then(|v| v.split_once(";base64,")) { + json!({ "inline_data": { "mime_type": mime_type, "data": data } }) + } else { + network_image_urls.push(url.clone()); + json!({ "url": url }) + } + }, + }) + .collect(); + json!({ "role": role, "parts": list }) + } + } + }) + .collect(); + + if !network_image_urls.is_empty() { + bail!( + "The model does not support network images: {:?}", + network_image_urls + ); + } + + let mut body = json!({ + "contents": contents, + }); + + if let Some(temperature) = temperature { + body["generationConfig"] = json!({ + "temperature": temperature, + }); + } + + Ok(body) +} + +async fn fetch_access_token( + client: &reqwest::Client, + file: &Option, +) -> Result<(String, i64)> { + let credentials = load_adc(file).await?; + let value: Value = client + .post("https://oauth2.googleapis.com/token") + .json(&credentials) + .send() + .await? + .json() + .await?; + + if let (Some(access_token), Some(expires_in)) = + (value["access_token"].as_str(), value["expires_in"].as_i64()) + { + Ok((access_token.to_string(), expires_in)) + } else if let Some(err_msg) = value["error_description"].as_str() { + bail!("{err_msg}") + } else { + bail!("Invalid response data") + } +} + +async fn load_adc(file: &Option) -> Result { + let adc_file = file + .as_ref() + .map(PathBuf::from) + .or_else(default_adc_file) + .ok_or_else(|| anyhow!("No application_default_credentials.json"))?; + let data = tokio::fs::read_to_string(adc_file).await?; + let data: Value = serde_json::from_str(&data)?; + if let (Some(client_id), Some(client_secret), Some(refresh_token)) = ( + data["client_id"].as_str(), + data["client_secret"].as_str(), + data["refresh_token"].as_str(), + ) { + Ok(json!({ + "client_id": client_id, + "client_secret": client_secret, + "refresh_token": refresh_token, + "grant_type": "refresh_token", + })) + } else { + bail!("Invalid application_default_credentials.json") + } +} + +#[cfg(not(windows))] +fn default_adc_file() -> Option { + let mut path = dirs::home_dir()?; + path.push(".config"); + path.push("gcloud"); + path.push("application_default_credentials.json"); + Some(path) +} + +#[cfg(windows)] +fn default_adc_file() -> Option { + let mut path = dirs::config_dir()?; + path.push("gcloud"); + path.push("application_default_credentials.json"); + Some(path) }