Merge pull request #861 from noamsiegel/scrape_url

Scrape url
This commit is contained in:
Eugen Eisler 2024-09-16 21:37:10 +02:00 committed by GitHub
commit a6fc13dbdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 139 additions and 11 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/danielmiessler/fabric/core"
"github.com/danielmiessler/fabric/db"
"github.com/danielmiessler/fabric/jina"
)
// Cli Controls the cli. It takes in the flags and runs the appropriate functions
@ -151,6 +152,36 @@ func Cli() (message string, err error) {
}
}
// Initialize JinaClient
jinaClient := jina.NewJinaClient()
// Load the configuration for JinaClient, including the API key
if err = jinaClient.Configurable.Configure(); err != nil {
return "", fmt.Errorf("failed to configure JinaClient: %w", err)
}
// Check if the scrape_url flag is set and call ScrapeURL
if currentFlags.ScrapeURL != "" {
message, err = jinaClient.ScrapeURL(currentFlags.ScrapeURL)
if err != nil {
return "", fmt.Errorf("failed to scrape URL: %w", err)
}
fmt.Println(message)
return message, nil
}
// Check if the scrape_question flag is set and call ScrapeQuestion
if currentFlags.ScrapeQuestion != "" {
message, err = jinaClient.ScrapeQuestion(currentFlags.ScrapeQuestion)
if err != nil {
return "", fmt.Errorf("failed to scrape question: %w", err)
}
fmt.Println(message)
return message, nil
}
var chatter *core.Chatter
if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream, currentFlags.DryRun); err != nil {
return

View File

@ -40,6 +40,9 @@ type Flags struct {
YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat"`
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
DryRun bool `long:"dry-run" description:"Show what would be sent to the model without actually sending it"`
ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"`
ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"`
}
// Init Initialize flags. returns a Flags struct and an error

View File

@ -14,6 +14,7 @@ import (
"github.com/atotto/clipboard"
"github.com/danielmiessler/fabric/common"
"github.com/danielmiessler/fabric/db"
"github.com/danielmiessler/fabric/jina"
"github.com/danielmiessler/fabric/vendors/anthropic"
"github.com/danielmiessler/fabric/vendors/azure"
"github.com/danielmiessler/fabric/vendors/dryrun"
@ -50,6 +51,7 @@ func NewFabricBase(db *db.Db) (ret *Fabric) {
VendorsAll: NewVendorsManager(),
PatternsLoader: NewPatternsLoader(db.Patterns),
YouTube: youtube.NewYouTube(),
Jina: jina.NewJinaClient(),
}
label := "Default"
@ -75,6 +77,7 @@ type Fabric struct {
VendorsAll *VendorsManager
*PatternsLoader
*youtube.YouTube
Jina *jina.JinaClient
Db *db.Db
@ -99,6 +102,7 @@ func (o *Fabric) SaveEnvFile() (err error) {
}
o.YouTube.SetupFillEnvFileContent(&envFileContent)
o.Jina.SetupFillEnvFileContent(&envFileContent)
err = o.Db.SaveEnv(envFileContent.String())
return
@ -115,6 +119,10 @@ func (o *Fabric) Setup() (err error) {
_ = o.YouTube.SetupOrSkip()
if err = o.Jina.SetupOrSkip(); err != nil {
return
}
if err = o.PatternsLoader.Setup(); err != nil {
return
}
@ -183,8 +191,9 @@ func (o *Fabric) configure() (err error) {
return
}
//YouTube is not mandatory, so ignore not configured error
//YouTube and Jina are not mandatory, so ignore not configured error
_ = o.YouTube.Configure()
_ = o.Jina.Configure()
return
}

85
jina/jina.go Normal file
View File

@ -0,0 +1,85 @@
package jina
// see https://jina.ai for more information
import (
"fmt"
"io"
"net/http"
"github.com/danielmiessler/fabric/common"
)
type JinaClient struct {
*common.Configurable
ApiKey *common.SetupQuestion
}
func NewJinaClient() *JinaClient {
label := "Jina AI"
client := &JinaClient{
Configurable: &common.Configurable{
Label: label,
EnvNamePrefix: common.BuildEnvVariablePrefix(label),
},
}
client.ApiKey = client.AddSetupQuestion("API Key", false)
return client
}
// return the main content of a webpage in clean, LLM-friendly text.
func (jc *JinaClient) ScrapeURL(url string) (string, error) {
requestURL := "https://r.jina.ai/" + url
req, err := http.NewRequest("GET", requestURL, nil)
if err != nil {
return "", fmt.Errorf("error creating request: %w", err)
}
// if api keys exist, set the header
if apiKey := jc.ApiKey.Value; apiKey != "" {
req.Header.Set("Authorization", "Bearer "+apiKey)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("error sending request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading response body: %w", err)
}
return string(body), nil
}
func (jc *JinaClient) ScrapeQuestion(question string) (string, error) {
requestURL := "https://s.jina.ai/" + question
req, err := http.NewRequest("GET", requestURL, nil)
if err != nil {
return "", fmt.Errorf("error creating request: %w", err)
}
// if api keys exist, set the header
if apiKey := jc.ApiKey.Value; apiKey != "" {
req.Header.Set("Authorization", "Bearer "+apiKey)
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("error sending request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading response body: %w", err)
}
return string(body), nil
}