Merge pull request #867 from danielmiessler/youtube_graber

feat: native integration of yt to fabric
This commit is contained in:
Daniel Miessler 2024-08-22 15:55:36 -07:00 committed by GitHub
commit 9a0444db7e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 274 additions and 0 deletions

View File

@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"github.com/danielmiessler/fabric/core"
"github.com/danielmiessler/fabric/db"
@ -101,6 +102,46 @@ func Cli() (message string, err error) {
// if none of the above currentFlags are set, run the initiate chat function
if currentFlags.YouTube != "" {
if fabric.YouTube.IsConfigured() == false {
err = fmt.Errorf("YouTube is not configured, please run the setup procedure")
return
}
var videoId string
if videoId, err = fabric.YouTube.GetVideoId(currentFlags.YouTube); err != nil {
return
}
if currentFlags.YouTubeTranscript {
var transcript string
if transcript, err = fabric.YouTube.GrabTranscript(videoId); err != nil {
return
}
if currentFlags.Message != "" {
currentFlags.Message = currentFlags.Message + "\n" + transcript
} else {
currentFlags.Message = transcript
}
}
if currentFlags.YouTubeComments {
var comments []string
if comments, err = fabric.YouTube.GrabComments(videoId); err != nil {
return
}
commentsString := strings.Join(comments, "\n")
if currentFlags.Message != "" {
currentFlags.Message = currentFlags.Message + "\n" + commentsString
} else {
currentFlags.Message = commentsString
}
}
}
var chatter *core.Chatter
if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream); err != nil {
return

View File

@ -34,6 +34,9 @@ type Flags struct {
Output string `short:"o" long:"output" description:"Output to file" default:""`
LatestPatterns string `short:"n" long:"latest" description:"Number of latest patterns to list" default:"0"`
ChangeDefaultModel bool `short:"d" long:"changeDefaultModel" description:"Change default pattern"`
YouTube string `short:"y" long:"youtube" description:"YouTube video url to grab transcript, comments from it and send to chat"`
YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat"`
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
}
// Init Initialize flags. returns a Flags struct and an error

1
go.mod
View File

@ -30,6 +30,7 @@ require (
dario.cat/mergo v1.0.0 // indirect
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/ProtonMail/go-crypto v1.0.0 // indirect
github.com/anaskhan96/soup v1.2.5 // indirect
github.com/cloudflare/circl v1.3.7 // indirect
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect

4
go.sum
View File

@ -19,6 +19,8 @@ github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migc
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/ProtonMail/go-crypto v1.0.0 h1:LRuvITjQWX+WIfr930YHG2HNfjR1uOfyf5vE0kC2U78=
github.com/ProtonMail/go-crypto v1.0.0/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0=
github.com/anaskhan96/soup v1.2.5 h1:V/FHiusdTrPrdF4iA1YkVxsOpdNcgvqT1hG+YtcZ5hM=
github.com/anaskhan96/soup v1.2.5/go.mod h1:6YnEp9A2yywlYdM4EgDz9NEHclocMepEtku7wg6Cq3s=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
@ -145,6 +147,7 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
@ -187,6 +190,7 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=

View File

@ -1,7 +1,18 @@
package youtube
import (
"context"
"encoding/json"
"flag"
"fmt"
"github.com/anaskhan96/soup"
"github.com/danielmiessler/fabric/common"
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
"log"
"regexp"
"strconv"
"strings"
)
func NewYouTube() (ret *YouTube) {
@ -22,4 +33,218 @@ func NewYouTube() (ret *YouTube) {
type YouTube struct {
*common.Configurable
ApiKey *common.SetupQuestion
service *youtube.Service
}
func (o *YouTube) initService() (err error) {
if o.service == nil {
ctx := context.Background()
o.service, err = youtube.NewService(ctx, option.WithAPIKey(o.ApiKey.Value))
}
return
}
func (o *YouTube) GetVideoId(url string) (ret string, err error) {
if err = o.initService(); err != nil {
return
}
pattern := `(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})`
re := regexp.MustCompile(pattern)
match := re.FindStringSubmatch(url)
if len(match) > 1 {
ret = match[1]
} else {
err = fmt.Errorf("invalid YouTube URL, can't get video ID")
}
return
}
func (o *YouTube) GrabTranscriptForUrl(url string) (ret string, err error) {
var videoId string
if videoId, err = o.GetVideoId(url); err != nil {
return
}
return o.GrabTranscript(videoId)
}
func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) {
var transcript string
if transcript, err = o.GrabTranscriptBase(videoId); err != nil {
err = fmt.Errorf("transcript not available. (%v)", err)
return
}
// Parse the XML transcript
doc := soup.HTMLParse(transcript)
// Extract the text content from the <text> tags
textTags := doc.FindAll("text")
var textBuilder strings.Builder
for _, textTag := range textTags {
textBuilder.WriteString(textTag.Text())
textBuilder.WriteString(" ")
ret = textBuilder.String()
}
return
}
func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) {
if err = o.initService(); err != nil {
return
}
url := "https://www.youtube.com/watch?v=" + videoId
var resp string
if resp, err = soup.Get(url); err != nil {
return
}
doc := soup.HTMLParse(resp)
scriptTags := doc.FindAll("script")
for _, scriptTag := range scriptTags {
if strings.Contains(scriptTag.Text(), "captionTracks") {
regex := regexp.MustCompile(`"captionTracks":(\[.*?\])`)
match := regex.FindStringSubmatch(scriptTag.Text())
if len(match) > 1 {
var captionTracks []struct {
BaseURL string `json:"baseUrl"`
}
if err = json.Unmarshal([]byte(match[1]), &captionTracks); err != nil {
return
}
if len(captionTracks) > 0 {
transcriptURL := captionTracks[0].BaseURL
ret, err = soup.Get(transcriptURL)
return
}
}
}
}
err = fmt.Errorf("transcript not found")
return
}
func (o *YouTube) GrabComments(videoId string) (ret []string, err error) {
if err = o.initService(); err != nil {
return
}
call := o.service.CommentThreads.List([]string{"snippet", "replies"}).VideoId(videoId).TextFormat("plainText").MaxResults(100)
var response *youtube.CommentThreadListResponse
if response, err = call.Do(); err != nil {
log.Printf("Failed to fetch comments: %v", err)
return
}
for _, item := range response.Items {
topLevelComment := item.Snippet.TopLevelComment.Snippet.TextDisplay
ret = append(ret, topLevelComment)
if item.Replies != nil {
for _, reply := range item.Replies.Comments {
replyText := reply.Snippet.TextDisplay
ret = append(ret, " - "+replyText)
}
}
}
return
}
func (o *YouTube) GrabDurationForUrl(url string) (ret int, err error) {
if err = o.initService(); err != nil {
return
}
var videoId string
if videoId, err = o.GetVideoId(url); err != nil {
return
}
return o.GrabDuration(videoId)
}
func (o *YouTube) GrabDuration(videoId string) (ret int, err error) {
var videoResponse *youtube.VideoListResponse
if videoResponse, err = o.service.Videos.List([]string{"contentDetails"}).Id(videoId).Do(); err != nil {
err = fmt.Errorf("error getting video details: %v", err)
return
}
durationStr := videoResponse.Items[0].ContentDetails.Duration
matches := regexp.MustCompile(`(?i)PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?`).FindStringSubmatch(durationStr)
if len(matches) == 0 {
return 0, fmt.Errorf("invalid duration string: %s", durationStr)
}
hours, _ := strconv.Atoi(matches[1])
minutes, _ := strconv.Atoi(matches[2])
seconds, _ := strconv.Atoi(matches[3])
ret = hours*60 + minutes + seconds/60
return
}
func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error) {
var videoId string
if videoId, err = o.GetVideoId(url); err != nil {
return
}
ret = &VideoInfo{}
if options.Duration {
if ret.Duration, err = o.GrabDuration(videoId); err != nil {
err = fmt.Errorf("error parsing video duration: %v", err)
return
}
}
if options.Comments {
if ret.Comments, err = o.GrabComments(videoId); err != nil {
err = fmt.Errorf("error getting comments: %v", err)
return
}
}
if options.Transcript {
if ret.Transcript, err = o.GrabTranscript(videoId); err != nil {
return
}
}
return
}
type Options struct {
Duration bool
Transcript bool
Comments bool
Lang string
}
type VideoInfo struct {
Transcript string `json:"transcript"`
Duration int `json:"duration"`
Comments []string `json:"comments"`
}
func (o *YouTube) GrabByFlags() (ret *VideoInfo, err error) {
options := &Options{}
flag.BoolVar(&options.Duration, "duration", false, "Output only the duration")
flag.BoolVar(&options.Transcript, "transcript", false, "Output only the transcript")
flag.BoolVar(&options.Comments, "comments", false, "Output the comments on the video")
flag.StringVar(&options.Lang, "lang", "en", "Language for the transcript (default: English)")
flag.Parse()
if flag.NArg() == 0 {
log.Fatal("Error: No URL provided.")
}
url := flag.Arg(0)
ret, err = o.Grab(url, options)
return
}