diff --git a/README.md b/README.md index 7c88b86..067ce0d 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ With sessions, AIChat conducts context-aware conversations. ![aichat-session](https://github.com/sigoden/aichat/assets/4012553/1444c5c9-ea67-4ad2-80df-a76954e8cce0) -### Retrieval-Augmented Generation (RAG) +### RAG Seamlessly integrates document interactions into your chat experience. @@ -92,13 +92,13 @@ Function calling supercharges LLMs by connecting them to external tools and data We have created a new repository [https://github.com/sigoden/llm-functions](https://github.com/sigoden/llm-functions) to help you make the most of this feature. -#### Tool Use +#### Tool Here's a glimpse of How to use the tools. -![aichat-tool-use](https://github.com/sigoden/aichat/assets/4012553/c1b6b136-bbd3-4028-9b01-7d728390c0bf) +![aichat-tool](https://github.com/sigoden/aichat/assets/4012553/f9d7f827-9a2a-4ea1-8b8c-19e1c93178d4) -#### AI Agent +#### Agent Agent = Prompt (Role) + Tools (Function Callings) + Knowndge (RAG). It's also known as OpenAI's GPTs. diff --git a/config.example.yaml b/config.example.yaml index d45a86c..45efb65 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -16,7 +16,7 @@ repl_prelude: null # Overrides the `prelude` setting specifically agent_prelude: null # Set a session to use when starting a agent. (e.g. temp, default) # ---- session ---- -# Controls the persistence of the session, if null, asking the user +# Controls the persistence of the session. if true, auto save; if false, not save; if null, asking the user save_session: null # Compress session when token count reaches or exceeds this threshold compress_threshold: 4000 @@ -25,20 +25,9 @@ summarize_prompt: 'Summarize the discussion briefly in 200 words or less to use # Text prompt used for including the summary of the entire session summary_prompt: 'This is a summary of the chat history as a recap: ' -# Define document loaders to control how RAG and `.file`/`--file` load files of specific formats. -document_loaders: - # You can add custom loaders using the following syntax: - # : - # Note: Use `$1` for input file and `$2` for output file. If `$2` is omitted, use stdout as output. - pdf: 'pdftotext $1 -' # Load .pdf file, see https://poppler.freedesktop.org - docx: 'pandoc --to plain $1' # Load .docx file - # xlsx: 'ssconvert $1 $2' # Load .xlsx file - # html: 'pandoc --to plain $1' # Load .html file - recursive_url: 'rag-crawler $1 $2' # Load websites, see https://github.com/sigoden/rag-crawler - # ---- function-calling & agent ---- -# Controls the function calling feature. For setup instructions, visit https://github.com/sigoden/llm-functions -function_calling: true +# Visit https://github.com/sigoden/llm-functions for setup instructions +function_calling: true # Enables or disables function calling (Globally). # Regex for seletecting dangerous functions # User confirmation is required when executing these functions # e.g. 'execute_command|execute_js_code' 'execute_.*' @@ -76,6 +65,17 @@ rag_template: | Given the context information, answer the query. Query: __INPUT__ +# Define document loaders to control how RAG and `.file`/`--file` load files of specific formats. +document_loaders: + # You can add custom loaders using the following syntax: + # : + # Note: Use `$1` for input file and `$2` for output file. If `$2` is omitted, use stdout as output. + pdf: 'pdftotext $1 -' # Load .pdf file, see https://poppler.freedesktop.org + docx: 'pandoc --to plain $1' # Load .docx file + # xlsx: 'ssconvert $1 $2' # Load .xlsx file + # html: 'pandoc --to plain $1' # Load .html file + recursive_url: 'rag-crawler $1 $2' # Load websites, see https://github.com/sigoden/rag-crawler + # ---- apperence ---- highlight: true # Controls syntax highlighting light_theme: false # Activates a light color theme when true. env: AICHAT_LIGHT_THEME @@ -98,7 +98,7 @@ clients: # - name: xxxx # Embedding model # type: embedding # max_input_tokens: 2048 - # default_chunk_size: 2000 + # default_chunk_size: 1500 # max_batch_size: 100 # - name: xxxx # Reranker model # type: reranker diff --git a/src/rag/loader.rs b/src/rag/loader.rs index 37fa803..7fa1fd6 100644 --- a/src/rag/loader.rs +++ b/src/rag/loader.rs @@ -7,7 +7,7 @@ use std::{collections::HashMap, path::Path}; pub const EXTENSION_METADATA: &str = "__extension__"; pub const PATH_METADATA: &str = "__path__"; -pub async fn load_recrusive_url( +pub async fn load_recursive_url( loaders: &HashMap, path: &str, ) -> Result> { diff --git a/src/rag/mod.rs b/src/rag/mod.rs index e2f6ddd..3e7eda5 100644 --- a/src/rag/mod.rs +++ b/src/rag/mod.rs @@ -276,7 +276,7 @@ impl Rag { println!("Load {path} [{}/{paths_len}]", index + 1); if Self::is_url_path(path) { if let Some(path) = path.strip_suffix("**") { - files.extend(load_recrusive_url(&loaders, path).await?); + files.extend(load_recursive_url(&loaders, path).await?); } else { files.push(load_url(&loaders, path).await?); } @@ -326,11 +326,11 @@ impl Rag { "\npath: {path}\n{metadata}\n\n" )); let document = RagDocument::new(contents); - let splitted_documents = splitter.split_documents(&[document], &split_options); + let split_documents = splitter.split_documents(&[document], &split_options); rag_files.push(RagFile { hash: hash.clone(), path, - documents: splitted_documents, + documents: split_documents, }); }