feat: Add Jina AI integration for web scraping and question search

This commit is contained in:
Noam Siegel 2024-09-09 21:22:19 -07:00
parent bac7d87390
commit 6f116ca527
4 changed files with 118 additions and 13 deletions

View File

@ -3,13 +3,13 @@ package cli
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"github.com/danielmiessler/fabric/core"
"github.com/danielmiessler/fabric/db"
"github.com/danielmiessler/fabric/jina"
)
// Cli Controls the cli. It takes in the flags and runs the appropriate functions
@ -96,17 +96,6 @@ func Cli() (message string, err error) {
return
}
// Check for ScrapeURL flag first
if currentFlags.ScrapeURL != "" {
url := currentFlags.ScrapeURL
curlCommand := fmt.Sprintf("curl https://r.jina.ai/%s", url)
fmt.Println("Executing command:", curlCommand) // Debug print
if err := exec.Command("sh", "-c", curlCommand).Run(); err != nil {
return "", fmt.Errorf("failed to run curl command: %w", err)
}
os.Exit(0)
}
// if the interactive flag is set, run the interactive function
// if currentFlags.Interactive {
// interactive.Interactive()
@ -154,6 +143,36 @@ func Cli() (message string, err error) {
}
}
// Initialize JinaClient
jinaClient := jina.NewJinaClient()
// Load the configuration for JinaClient, including the API key
if err = jinaClient.Configurable.Configure(); err != nil {
return "", fmt.Errorf("failed to configure JinaClient: %w", err)
}
// Check if the scrape_url flag is set and call ScrapeURL
if currentFlags.ScrapeURL != "" {
message, err = jinaClient.ScrapeURL(currentFlags.ScrapeURL)
if err != nil {
return "", fmt.Errorf("failed to scrape URL: %w", err)
}
fmt.Println(message)
return message, nil
}
// Check if the scrape_question flag is set and call ScrapeQuestion
if currentFlags.ScrapeQuestion != "" {
message, err = jinaClient.ScrapeQuestion(currentFlags.ScrapeQuestion)
if err != nil {
return "", fmt.Errorf("failed to scrape question: %w", err)
}
fmt.Println(message)
return message, nil
}
var chatter *core.Chatter
if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream, currentFlags.DryRun); err != nil {
return

View File

@ -40,6 +40,7 @@ type Flags struct {
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
DryRun bool `long:"dry-run" description:"Show what would be sent to the model without actually sending it"`
ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"`
ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"`
}

View File

@ -10,6 +10,7 @@ import (
"github.com/atotto/clipboard"
"github.com/danielmiessler/fabric/common"
"github.com/danielmiessler/fabric/db"
"github.com/danielmiessler/fabric/jina"
"github.com/danielmiessler/fabric/vendors/anthropic"
"github.com/danielmiessler/fabric/vendors/azure"
"github.com/danielmiessler/fabric/vendors/dryrun"
@ -45,6 +46,7 @@ func NewFabricBase(db *db.Db) (ret *Fabric) {
VendorsAll: NewVendorsManager(),
PatternsLoader: NewPatternsLoader(db.Patterns),
YouTube: youtube.NewYouTube(),
Jina: jina.NewJinaClient(),
}
label := "Default"
@ -70,6 +72,7 @@ type Fabric struct {
VendorsAll *VendorsManager
*PatternsLoader
*youtube.YouTube
Jina *jina.JinaClient
Db *db.Db
@ -94,6 +97,7 @@ func (o *Fabric) SaveEnvFile() (err error) {
}
o.YouTube.SetupFillEnvFileContent(&envFileContent)
o.Jina.SetupFillEnvFileContent(&envFileContent)
err = o.Db.SaveEnv(envFileContent.String())
return
@ -110,6 +114,10 @@ func (o *Fabric) Setup() (err error) {
_ = o.YouTube.SetupOrSkip()
if err = o.Jina.SetupOrSkip(); err != nil {
return
}
if err = o.PatternsLoader.Setup(); err != nil {
return
}
@ -178,8 +186,9 @@ func (o *Fabric) configure() (err error) {
return
}
//YouTube is not mandatory, so ignore not configured error
//YouTube and Jina are not mandatory, so ignore not configured error
_ = o.YouTube.Configure()
_ = o.Jina.Configure()
return
}

76
jina/jina.go Normal file
View File

@ -0,0 +1,76 @@
package jina
// see https://jina.ai for more information
import (
"fmt"
"io"
"net/http"
"github.com/danielmiessler/fabric/common"
)
type JinaClient struct {
*common.Configurable
ApiKey *common.SetupQuestion
}
func NewJinaClient() *JinaClient {
label := "Jina AI"
client := &JinaClient{
Configurable: &common.Configurable{
Label: label,
EnvNamePrefix: common.BuildEnvVariablePrefix(label),
},
}
client.ApiKey = client.AddSetupQuestion("API Key", true)
return client
}
// return the main content of a webpage in clean, LLM-friendly text.
func (jc *JinaClient) ScrapeURL(url string) (string, error) {
requestURL := "https://r.jina.ai/" + url
req, err := http.NewRequest("GET", requestURL, nil)
if err != nil {
return "", fmt.Errorf("error creating request: %w", err)
}
apiKey := jc.ApiKey.Value
// Set the Authorization header with the Bearer token
req.Header.Set("Authorization", "Bearer " + apiKey)
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("error sending request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading response body: %w", err)
}
return string(body), nil
}
// search engine call that returns top-5 results with their URLs and contents, each in clean, LLM-friendly text.
func (jc *JinaClient) ScrapeQuestion(question string) (string, error) {
url := "https://s.jina.ai/" + question
resp, err := http.Get(url)
if err != nil {
return "", fmt.Errorf("error making GET request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading response body: %w", err)
}
return string(body), nil
}