From c7449c68b7fee44ae351dc3b6b50672cde47fef9 Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:44:57 -0700 Subject: [PATCH 1/5] chore: Add ScrapeURL flag for CLI to scrape website URL to markdown using Jina AI --- cli/cli.go | 16 ++++++++++++++++ cli/flags.go | 46 ++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index 6028e65..83847f2 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -3,6 +3,7 @@ package cli import ( "fmt" "os" + "os/exec" "path/filepath" "strconv" @@ -94,6 +95,21 @@ func Cli() (message string, err error) { return } + // Check for ScrapeURL flag first + if currentFlags.ScrapeURL != "" { + fmt.Println("ScrapeURL flag is set") // Debug print + url := currentFlags.ScrapeURL + curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url) + fmt.Println("Executing command:", curlCommand) // Debug print + if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil { + return "", fmt.Errorf("failed to run curl command: %w", err) + } + fmt.Println("Curl command executed successfully") // Debug print + os.Exit(0) + } else { + fmt.Println("ScrapeURL flag is not set") // Debug print + } + // if the interactive flag is set, run the interactive function // if currentFlags.Interactive { // interactive.Interactive() diff --git a/cli/flags.go b/cli/flags.go index c4fde8d..270ca84 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -34,36 +34,38 @@ type Flags struct { Output string `short:"o" long:"output" description:"Output to file" default:""` LatestPatterns string `short:"n" long:"latest" description:"Number of latest patterns to list" default:"0"` ChangeDefaultModel bool `short:"d" long:"changeDefaultModel" description:"Change default pattern"` + ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"` + } // Init Initialize flags. returns a Flags struct and an error func Init() (ret *Flags, err error) { - var message string + var message string - ret = &Flags{} - parser := flags.NewParser(ret, flags.Default) - var args []string - if args, err = parser.Parse(); err != nil { - return - } + ret = &Flags{} + parser := flags.NewParser(ret, flags.Default) + var args []string + if args, err = parser.Parse(); err != nil { + return + } - info, _ := os.Stdin.Stat() - hasStdin := (info.Mode() & os.ModeCharDevice) == 0 + info, _ := os.Stdin.Stat() + hasStdin := (info.Mode() & os.ModeCharDevice) == 0 - // takes input from stdin if it exists, otherwise takes input from args (the last argument) - if hasStdin { - if message, err = readStdin(); err != nil { - err = errors.New("error: could not read from stdin") - return - } - } else if len(args) > 0 { - message = args[len(args)-1] - } else { - message = "" - } - ret.Message = message + // takes input from stdin if it exists, otherwise takes input from args (the last argument) + if hasStdin { + if message, err = readStdin(); err != nil { + err = errors.New("error: could not read from stdin") + return + } + } else if len(args) > 0 { + message = args[len(args)-1] + } else { + message = "" + } + ret.Message = message - return + return } // readStdin reads from stdin and returns the input as a string or an error From 9b4c20dd19ca9e919b496bf6bd99073f830b9334 Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:54:57 -0700 Subject: [PATCH 2/5] removed debug statements --- cli/cli.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index 83847f2..59ce5d1 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -100,14 +100,10 @@ func Cli() (message string, err error) { fmt.Println("ScrapeURL flag is set") // Debug print url := currentFlags.ScrapeURL curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url) - fmt.Println("Executing command:", curlCommand) // Debug print if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil { return "", fmt.Errorf("failed to run curl command: %w", err) } - fmt.Println("Curl command executed successfully") // Debug print os.Exit(0) - } else { - fmt.Println("ScrapeURL flag is not set") // Debug print } // if the interactive flag is set, run the interactive function From b2be94f2f8d7f1f43248820ca8a91b9d41975d59 Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Wed, 21 Aug 2024 11:03:04 -0700 Subject: [PATCH 3/5] added back some debug statements --- cli/cli.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/cli.go b/cli/cli.go index 59ce5d1..a63d77a 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -97,9 +97,9 @@ func Cli() (message string, err error) { // Check for ScrapeURL flag first if currentFlags.ScrapeURL != "" { - fmt.Println("ScrapeURL flag is set") // Debug print url := currentFlags.ScrapeURL curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url) + fmt.Println("Executing command:", curlCommand) // Debug print if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil { return "", fmt.Errorf("failed to run curl command: %w", err) } From 6f116ca5279698bcc80a061cd544b7783f3a882b Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Mon, 9 Sep 2024 21:22:19 -0700 Subject: [PATCH 4/5] feat: Add Jina AI integration for web scraping and question search --- cli/cli.go | 43 ++++++++++++++++++++-------- cli/flags.go | 1 + core/fabric.go | 11 +++++++- jina/jina.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 13 deletions(-) create mode 100644 jina/jina.go diff --git a/cli/cli.go b/cli/cli.go index 69dbeca..b826636 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -3,13 +3,13 @@ package cli import ( "fmt" "os" - "os/exec" "path/filepath" "strconv" "strings" "github.com/danielmiessler/fabric/core" "github.com/danielmiessler/fabric/db" + "github.com/danielmiessler/fabric/jina" ) // Cli Controls the cli. It takes in the flags and runs the appropriate functions @@ -96,17 +96,6 @@ func Cli() (message string, err error) { return } - // Check for ScrapeURL flag first - if currentFlags.ScrapeURL != "" { - url := currentFlags.ScrapeURL - curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url) - fmt.Println("Executing command:", curlCommand) // Debug print - if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil { - return "", fmt.Errorf("failed to run curl command: %w", err) - } - os.Exit(0) - } - // if the interactive flag is set, run the interactive function // if currentFlags.Interactive { // interactive.Interactive() @@ -154,6 +143,36 @@ func Cli() (message string, err error) { } } + // Initialize JinaClient + jinaClient := jina.NewJinaClient() + + // Load the configuration for JinaClient, including the API key + if err = jinaClient.Configurable.Configure(); err != nil { + return "", fmt.Errorf("failed to configure JinaClient: %w", err) + } + + // Check if the scrape_url flag is set and call ScrapeURL + if currentFlags.ScrapeURL != "" { + message, err = jinaClient.ScrapeURL(currentFlags.ScrapeURL) + if err != nil { + return "", fmt.Errorf("failed to scrape URL: %w", err) + } + fmt.Println(message) + return message, nil + } + + // Check if the scrape_question flag is set and call ScrapeQuestion + if currentFlags.ScrapeQuestion != "" { + message, err = jinaClient.ScrapeQuestion(currentFlags.ScrapeQuestion) + if err != nil { + return "", fmt.Errorf("failed to scrape question: %w", err) + } + fmt.Println(message) + return message, nil + } + + + var chatter *core.Chatter if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream, currentFlags.DryRun); err != nil { return diff --git a/cli/flags.go b/cli/flags.go index 1216829..7bc4b12 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -40,6 +40,7 @@ type Flags struct { YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"` DryRun bool `long:"dry-run" description:"Show what would be sent to the model without actually sending it"` ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"` + ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"` } diff --git a/core/fabric.go b/core/fabric.go index 7616ea5..e0ba89f 100644 --- a/core/fabric.go +++ b/core/fabric.go @@ -10,6 +10,7 @@ import ( "github.com/atotto/clipboard" "github.com/danielmiessler/fabric/common" "github.com/danielmiessler/fabric/db" + "github.com/danielmiessler/fabric/jina" "github.com/danielmiessler/fabric/vendors/anthropic" "github.com/danielmiessler/fabric/vendors/azure" "github.com/danielmiessler/fabric/vendors/dryrun" @@ -45,6 +46,7 @@ func NewFabricBase(db *db.Db) (ret *Fabric) { VendorsAll: NewVendorsManager(), PatternsLoader: NewPatternsLoader(db.Patterns), YouTube: youtube.NewYouTube(), + Jina: jina.NewJinaClient(), } label := "Default" @@ -70,6 +72,7 @@ type Fabric struct { VendorsAll *VendorsManager *PatternsLoader *youtube.YouTube + Jina *jina.JinaClient Db *db.Db @@ -94,6 +97,7 @@ func (o *Fabric) SaveEnvFile() (err error) { } o.YouTube.SetupFillEnvFileContent(&envFileContent) + o.Jina.SetupFillEnvFileContent(&envFileContent) err = o.Db.SaveEnv(envFileContent.String()) return @@ -110,6 +114,10 @@ func (o *Fabric) Setup() (err error) { _ = o.YouTube.SetupOrSkip() + if err = o.Jina.SetupOrSkip(); err != nil { + return + } + if err = o.PatternsLoader.Setup(); err != nil { return } @@ -178,8 +186,9 @@ func (o *Fabric) configure() (err error) { return } - //YouTube is not mandatory, so ignore not configured error + //YouTube and Jina are not mandatory, so ignore not configured error _ = o.YouTube.Configure() + _ = o.Jina.Configure() return } diff --git a/jina/jina.go b/jina/jina.go new file mode 100644 index 0000000..125d6d8 --- /dev/null +++ b/jina/jina.go @@ -0,0 +1,76 @@ +package jina + +// see https://jina.ai for more information + +import ( + "fmt" + "io" + "net/http" + + "github.com/danielmiessler/fabric/common" +) + +type JinaClient struct { + *common.Configurable + ApiKey *common.SetupQuestion +} + +func NewJinaClient() *JinaClient { + + label := "Jina AI" + + client := &JinaClient{ + Configurable: &common.Configurable{ + Label: label, + EnvNamePrefix: common.BuildEnvVariablePrefix(label), + }, + } + client.ApiKey = client.AddSetupQuestion("API Key", true) + return client +} + +// return the main content of a webpage in clean, LLM-friendly text. +func (jc *JinaClient) ScrapeURL(url string) (string, error) { + requestURL := "https://r.jina.ai/" + url + req, err := http.NewRequest("GET", requestURL, nil) + if err != nil { + return "", fmt.Errorf("error creating request: %w", err) + } + + apiKey := jc.ApiKey.Value + + // Set the Authorization header with the Bearer token + req.Header.Set("Authorization", "Bearer " + apiKey) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("error sending request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("error reading response body: %w", err) + } + + return string(body), nil +} + +// search engine call that returns top-5 results with their URLs and contents, each in clean, LLM-friendly text. +func (jc *JinaClient) ScrapeQuestion(question string) (string, error) { + url := "https://s.jina.ai/" + question + + resp, err := http.Get(url) + if err != nil { + return "", fmt.Errorf("error making GET request: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("error reading response body: %w", err) + } + + return string(body), nil +} \ No newline at end of file From 4534ef654429f0161b1c06bd2772dcddd012e85a Mon Sep 17 00:00:00 2001 From: Noam Siegel <52804845+noamsiegel@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:44:21 -0700 Subject: [PATCH 5/5] made jina api key optional --- jina/jina.go | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/jina/jina.go b/jina/jina.go index 125d6d8..f8cf95c 100644 --- a/jina/jina.go +++ b/jina/jina.go @@ -25,8 +25,8 @@ func NewJinaClient() *JinaClient { EnvNamePrefix: common.BuildEnvVariablePrefix(label), }, } - client.ApiKey = client.AddSetupQuestion("API Key", true) - return client + client.ApiKey = client.AddSetupQuestion("API Key", false) + return client } // return the main content of a webpage in clean, LLM-friendly text. @@ -37,10 +37,10 @@ func (jc *JinaClient) ScrapeURL(url string) (string, error) { return "", fmt.Errorf("error creating request: %w", err) } - apiKey := jc.ApiKey.Value - - // Set the Authorization header with the Bearer token - req.Header.Set("Authorization", "Bearer " + apiKey) + // if api keys exist, set the header + if apiKey := jc.ApiKey.Value; apiKey != "" { + req.Header.Set("Authorization", "Bearer "+apiKey) + } client := &http.Client{} resp, err := client.Do(req) @@ -57,20 +57,29 @@ func (jc *JinaClient) ScrapeURL(url string) (string, error) { return string(body), nil } -// search engine call that returns top-5 results with their URLs and contents, each in clean, LLM-friendly text. func (jc *JinaClient) ScrapeQuestion(question string) (string, error) { - url := "https://s.jina.ai/" + question + requestURL := "https://s.jina.ai/" + question + req, err := http.NewRequest("GET", requestURL, nil) + if err != nil { + return "", fmt.Errorf("error creating request: %w", err) + } - resp, err := http.Get(url) - if err != nil { - return "", fmt.Errorf("error making GET request: %w", err) - } - defer resp.Body.Close() + // if api keys exist, set the header + if apiKey := jc.ApiKey.Value; apiKey != "" { + req.Header.Set("Authorization", "Bearer "+apiKey) + } - body, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("error reading response body: %w", err) - } + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("error sending request: %w", err) + } + defer resp.Body.Close() - return string(body), nil + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("error reading response body: %w", err) + } + + return string(body), nil } \ No newline at end of file