feat: support ollama (#276)

6 months ago · 64c4edf7c8
parent 6c9d7a679e
commit 64c4edf7c8
4 changed files with 235 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -3,7 +3,7 @@
 [![CI](https://github.com/sigoden/aichat/actions/workflows/ci.yaml/badge.svg)](https://github.com/sigoden/aichat/actions/workflows/ci.yaml)
 [![Crates](https://img.shields.io/crates/v/aichat.svg)](https://crates.io/crates/aichat)

-Use GPT-4(V), Gemini, LocalAI and other LLMs in the terminal.
+Use GPT-4(V), Gemini, LocalAI, Ollama and other LLMs in the terminal.

 AIChat in chat REPL mode:

@ -49,11 +49,12 @@ Download it from [GitHub Releases](https://github.com/sigoden/aichat/releases),
 ## Support Models

 - [x] OpenAI: gpt-3.5/gpt-4/gpt-4-vision
+- [x] Gemini: gemini-pro/gemini-pro-vision/gemini-ultra 
 - [x] LocalAI: user deployed opensource LLMs 
+- [x] Ollama: user deployed opensource LLMs 
 - [x] Azure-OpenAI: user created gpt3.5/gpt4
- [x] Gemini: gemini-pro/gemini-pro-vision/gemini-ultra 
 - [x] Ernie: ernie-bot-turbo/ernie-bot/ernie-bot-8k/ernie-bot-4
- [x] Qianwen: qwen-turbo/qwen-plus/qwen-max
+- [x] Qianwen: qwen-turbo/qwen-plus/qwen-max/qwen-max-longcontext/qwen-vl-plus

 ## Features

--- a/config.example.yaml
+++ b/config.example.yaml
@ -22,26 +22,35 @@ clients:
    api_key: sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    organization_id:

-  # See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart
-  - type: azure-openai
-    api_base: https://RESOURCE.openai.azure.com
-    api_key: xxx
-    models:
-      - name: MyGPT4                                  # Model deployment name
-        max_tokens: 8192
+  # See https://ai.google.dev/docs
+  - type: gemini
+    api_key: AIxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

  # See https://github.com/go-skynet/LocalAI
  - type: localai
    api_base: http://localhost:8080/v1
    api_key: xxx
-    chat_endpoint: /chat/completions
+    chat_endpoint: /chat/completions                  # Optional field
    models:
      - name: gpt4all-j
        max_tokens: 8192

-  # See https://ai.google.dev/docs
-  - type: gemini
-    api_key: AIxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+  # See https://github.com/jmorganca/ollama
+  - type: ollama
+    api_base: http://localhost:11434/api
+    api_key: Baisc xxx
+    chat_endpoint: /chat                              # Optional field
+    models:
+      - name: gpt4all-j
+        max_tokens: 8192
+
+  # See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart
+  - type: azure-openai
+    api_base: https://RESOURCE.openai.azure.com
+    api_key: xxx
+    models:
+      - name: MyGPT4                                  # Model deployment name
+        max_tokens: 8192

  # See https://cloud.baidu.com/doc/WENXINWORKSHOP/index.html
  - type: ernie
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@ -9,14 +9,15 @@ pub use model::*;

 register_client!(
    (openai, "openai", OpenAIConfig, OpenAIClient),
+    (gemini, "gemini", GeminiConfig, GeminiClient),
    (localai, "localai", LocalAIConfig, LocalAIClient),
+    (ollama, "ollama", OllamaConfig, OllamaClient),
    (
        azure_openai,
        "azure-openai",
        AzureOpenAIConfig,
        AzureOpenAIClient
    ),
-    (gemini, "gemini", GeminiConfig, GeminiClient),
    (ernie, "ernie", ErnieConfig, ErnieClient),
    (qianwen, "qianwen", QianwenConfig, QianwenClient),
 );
--- a/src/client/ollama.rs
+++ b/src/client/ollama.rs
@ -0,0 +1,209 @@
+use super::{
+    message::*, patch_system_message, Client, ExtraConfig, Model, OllamaClient, PromptType,
+    SendData, TokensCountFactors,
+};
+
+use crate::{config::GlobalConfig, render::ReplyHandler, utils::PromptKind};
+
+use anyhow::{anyhow, bail, Result};
+use async_trait::async_trait;
+use futures_util::StreamExt;
+use reqwest::{Client as ReqwestClient, RequestBuilder};
+use serde::Deserialize;
+use serde_json::{json, Value};
+
+const TOKENS_COUNT_FACTORS: TokensCountFactors = (5, 2);
+
+#[derive(Debug, Clone, Deserialize, Default)]
+pub struct OllamaConfig {
+    pub name: Option<String>,
+    pub api_base: String,
+    pub api_key: Option<String>,
+    pub chat_endpoint: Option<String>,
+    pub models: Vec<LocalAIModel>,
+    pub extra: Option<ExtraConfig>,
+}
+
+#[derive(Debug, Clone, Deserialize)]
+pub struct LocalAIModel {
+    name: String,
+    max_tokens: Option<usize>,
+}
+
+#[async_trait]
+impl Client for OllamaClient {
+    fn config(&self) -> (&GlobalConfig, &Option<ExtraConfig>) {
+        (&self.global_config, &self.config.extra)
+    }
+
+    async fn send_message_inner(&self, client: &ReqwestClient, data: SendData) -> Result<String> {
+        let builder = self.request_builder(client, data)?;
+        send_message(builder).await
+    }
+
+    async fn send_message_streaming_inner(
+        &self,
+        client: &ReqwestClient,
+        handler: &mut ReplyHandler,
+        data: SendData,
+    ) -> Result<()> {
+        let builder = self.request_builder(client, data)?;
+        send_message_streaming(builder, handler).await
+    }
+}
+
+impl OllamaClient {
+    config_get_fn!(api_key, get_api_key);
+
+    pub const PROMPTS: [PromptType<'static>; 4] = [
+        ("api_base", "API Base:", true, PromptKind::String),
+        ("api_key", "API Key:", false, PromptKind::String),
+        ("models[].name", "Model Name:", true, PromptKind::String),
+        (
+            "models[].max_tokens",
+            "Max Tokens:",
+            false,
+            PromptKind::Integer,
+        ),
+    ];
+
+    pub fn list_models(local_config: &OllamaConfig) -> Vec<Model> {
+        let client_name = Self::name(local_config);
+
+        local_config
+            .models
+            .iter()
+            .map(|v| {
+                Model::new(client_name, &v.name)
+                    .set_max_tokens(v.max_tokens)
+                    .set_tokens_count_factors(TOKENS_COUNT_FACTORS)
+            })
+            .collect()
+    }
+
+    fn request_builder(&self, client: &ReqwestClient, data: SendData) -> Result<RequestBuilder> {
+        let api_key = self.get_api_key().ok();
+
+        let body = build_body(data, self.model.name.clone())?;
+
+        let chat_endpoint = self.config.chat_endpoint.as_deref().unwrap_or("/chat");
+
+        let url = format!("{}{chat_endpoint}", self.config.api_base);
+
+        debug!("Ollama Request: {url} {body}");
+
+        let mut builder = client.post(url).json(&body);
+        if let Some(api_key) = api_key {
+            builder = builder.header("Authorization", api_key)
+        }
+
+        Ok(builder)
+    }
+}
+
+async fn send_message(builder: RequestBuilder) -> Result<String> {
+    let res = builder.send().await?;
+    let status = res.status();
+    if status != 200 {
+        let text = res.text().await?;
+        bail!("{status}, {text}");
+    }
+    let data: Value = res.json().await?;
+    let output = data["message"]["content"]
+        .as_str()
+        .ok_or_else(|| anyhow!("Invalid response data: {data}"))?;
+    Ok(output.to_string())
+}
+
+async fn send_message_streaming(builder: RequestBuilder, handler: &mut ReplyHandler) -> Result<()> {
+    let res = builder.send().await?;
+    let status = res.status();
+    if status != 200 {
+        let text = res.text().await?;
+        bail!("{status}, {text}");
+    } else {
+        let mut stream = res.bytes_stream();
+        while let Some(chunk) = stream.next().await {
+            let chunk = chunk?;
+            let data: Value = serde_json::from_slice(&chunk)?;
+            if data["done"].is_boolean() {
+                if let Some(text) = data["message"]["content"].as_str() {
+                    handler.text(text)?;
+                }
+            } else {
+                bail!("Invalid response data: {data}")
+            }
+        }
+    }
+    Ok(())
+}
+
+fn build_body(data: SendData, model: String) -> Result<Value> {
+    let SendData {
+        mut messages,
+        temperature,
+        stream,
+    } = data;
+
+    patch_system_message(&mut messages);
+
+    let mut network_image_urls = vec![];
+    let messages: Vec<Value> = messages
+        .into_iter()
+        .map(|message| {
+            let role = message.role;
+            match message.content {
+                MessageContent::Text(text) => json!({
+                    "role": role,
+                    "content": text,
+                }),
+                MessageContent::Array(list) => {
+                    let mut content = vec![];
+                    let mut images = vec![];
+                    for item in list {
+                        match item {
+                            MessageContentPart::Text { text } => {
+                                content.push(text);
+                            }
+                            MessageContentPart::ImageUrl {
+                                image_url: ImageUrl { url },
+                            } => {
+                                if let Some((_, data)) = url
+                                    .strip_prefix("data:")
+                                    .and_then(|v| v.split_once(";base64,"))
+                                {
+                                    images.push(data.to_string());
+                                } else {
+                                    network_image_urls.push(url.clone());
+                                }
+                            }
+                        }
+                    }
+                    let content = content.join("\n\n");
+                    json!({ "role": role, "content": content, "images": images })
+                }
+            }
+        })
+        .collect();
+
+    if !network_image_urls.is_empty() {
+        bail!(
+            "The model does not support network images: {:?}",
+            network_image_urls
+        );
+    }
+
+    let mut body = json!({
+        "model": model,
+        "messages": messages,
+        "stream": stream,
+    });
+
+    if let Some(temperature) = temperature {
+        body["options"] = json!({
+            "temperature": temperature,
+        });
+    }
+
+    Ok(body)
+}