From 6f116ca5279698bcc80a061cd544b7783f3a882b Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Mon, 9 Sep 2024 21:22:19 -0700 Subject: [PATCH] feat: Add Jina AI integration for web scraping and question search --- cli/cli.go | 43 ++++++++++++++++++++-------- cli/flags.go | 1 + core/fabric.go | 11 +++++++- jina/jina.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 13 deletions(-) create mode 100644 jina/jina.go diff --git a/cli/cli.go b/cli/cli.go index 69dbeca..b826636 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -3,13 +3,13 @@ package cli import ( "fmt" "os" - "os/exec" "path/filepath" "strconv" "strings" "github.com/danielmiessler/fabric/core" "github.com/danielmiessler/fabric/db" + "github.com/danielmiessler/fabric/jina" ) // Cli Controls the cli. It takes in the flags and runs the appropriate functions @@ -96,17 +96,6 @@ func Cli() (message string, err error) { return } - // Check for ScrapeURL flag first - if currentFlags.ScrapeURL != "" { - url := currentFlags.ScrapeURL - curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url) - fmt.Println("Executing command:", curlCommand) // Debug print - if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil { - return "", fmt.Errorf("failed to run curl command: %w", err) - } - os.Exit(0) - } - // if the interactive flag is set, run the interactive function // if currentFlags.Interactive { // interactive.Interactive() @@ -154,6 +143,36 @@ func Cli() (message string, err error) { } } + // Initialize JinaClient + jinaClient := jina.NewJinaClient() + + // Load the configuration for JinaClient, including the API key + if err = jinaClient.Configurable.Configure(); err != nil { + return "", fmt.Errorf("failed to configure JinaClient: %w", err) + } + + // Check if the scrape_url flag is set and call ScrapeURL + if currentFlags.ScrapeURL != "" { + message, err = jinaClient.ScrapeURL(currentFlags.ScrapeURL) + if err != nil { + return "", fmt.Errorf("failed to scrape URL: %w", err) + } + fmt.Println(message) + return message, nil + } + + // Check if the scrape_question flag is set and call ScrapeQuestion + if currentFlags.ScrapeQuestion != "" { + message, err = jinaClient.ScrapeQuestion(currentFlags.ScrapeQuestion) + if err != nil { + return "", fmt.Errorf("failed to scrape question: %w", err) + } + fmt.Println(message) + return message, nil + } + + + var chatter *core.Chatter if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream, currentFlags.DryRun); err != nil { return diff --git a/cli/flags.go b/cli/flags.go index 1216829..7bc4b12 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -40,6 +40,7 @@ type Flags struct { YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"` DryRun bool `long:"dry-run" description:"Show what would be sent to the model without actually sending it"` ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"` + ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"` } diff --git a/core/fabric.go b/core/fabric.go index 7616ea5..e0ba89f 100644 --- a/core/fabric.go +++ b/core/fabric.go @@ -10,6 +10,7 @@ import ( "github.com/atotto/clipboard" "github.com/danielmiessler/fabric/common" "github.com/danielmiessler/fabric/db" + "github.com/danielmiessler/fabric/jina" "github.com/danielmiessler/fabric/vendors/anthropic" "github.com/danielmiessler/fabric/vendors/azure" "github.com/danielmiessler/fabric/vendors/dryrun" @@ -45,6 +46,7 @@ func NewFabricBase(db *db.Db) (ret *Fabric) { VendorsAll: NewVendorsManager(), PatternsLoader: NewPatternsLoader(db.Patterns), YouTube: youtube.NewYouTube(), + Jina: jina.NewJinaClient(), } label := "Default" @@ -70,6 +72,7 @@ type Fabric struct { VendorsAll *VendorsManager *PatternsLoader *youtube.YouTube + Jina *jina.JinaClient Db *db.Db @@ -94,6 +97,7 @@ func (o *Fabric) SaveEnvFile() (err error) { } o.YouTube.SetupFillEnvFileContent(&envFileContent) + o.Jina.SetupFillEnvFileContent(&envFileContent) err = o.Db.SaveEnv(envFileContent.String()) return @@ -110,6 +114,10 @@ func (o *Fabric) Setup() (err error) { _ = o.YouTube.SetupOrSkip() + if err = o.Jina.SetupOrSkip(); err != nil { + return + } + if err = o.PatternsLoader.Setup(); err != nil { return } @@ -178,8 +186,9 @@ func (o *Fabric) configure() (err error) { return } - //YouTube is not mandatory, so ignore not configured error + //YouTube and Jina are not mandatory, so ignore not configured error _ = o.YouTube.Configure() + _ = o.Jina.Configure() return } diff --git a/jina/jina.go b/jina/jina.go new file mode 100644 index 0000000..125d6d8 --- /dev/null +++ b/jina/jina.go @@ -0,0 +1,76 @@ +package jina + +// see https://jina.ai for more information + +import ( + "fmt" + "io" + "net/http" + + "github.com/danielmiessler/fabric/common" +) + +type JinaClient struct { + *common.Configurable + ApiKey *common.SetupQuestion +} + +func NewJinaClient() *JinaClient { + + label := "Jina AI" + + client := &JinaClient{ + Configurable: &common.Configurable{ + Label: label, + EnvNamePrefix: common.BuildEnvVariablePrefix(label), + }, + } + client.ApiKey = client.AddSetupQuestion("API Key", true) + return client +} + +// return the main content of a webpage in clean, LLM-friendly text. +func (jc *JinaClient) ScrapeURL(url string) (string, error) { + requestURL := "https://r.jina.ai/" + url + req, err := http.NewRequest("GET", requestURL, nil) + if err != nil { + return "", fmt.Errorf("error creating request: %w", err) + } + + apiKey := jc.ApiKey.Value + + // Set the Authorization header with the Bearer token + req.Header.Set("Authorization", "Bearer " + apiKey) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("error sending request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("error reading response body: %w", err) + } + + return string(body), nil +} + +// search engine call that returns top-5 results with their URLs and contents, each in clean, LLM-friendly text. +func (jc *JinaClient) ScrapeQuestion(question string) (string, error) { + url := "https://s.jina.ai/" + question + + resp, err := http.Get(url) + if err != nil { + return "", fmt.Errorf("error making GET request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("error reading response body: %w", err) + } + + return string(body), nil +} \ No newline at end of file