mirror of
https://github.com/danielmiessler/fabric
synced 2024-11-08 07:11:06 +00:00
Merge pull request #867 from danielmiessler/youtube_graber
feat: native integration of yt to fabric
This commit is contained in:
commit
9a0444db7e
41
cli/cli.go
41
cli/cli.go
@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/danielmiessler/fabric/core"
|
||||
"github.com/danielmiessler/fabric/db"
|
||||
@ -101,6 +102,46 @@ func Cli() (message string, err error) {
|
||||
|
||||
// if none of the above currentFlags are set, run the initiate chat function
|
||||
|
||||
if currentFlags.YouTube != "" {
|
||||
if fabric.YouTube.IsConfigured() == false {
|
||||
err = fmt.Errorf("YouTube is not configured, please run the setup procedure")
|
||||
return
|
||||
}
|
||||
|
||||
var videoId string
|
||||
if videoId, err = fabric.YouTube.GetVideoId(currentFlags.YouTube); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if currentFlags.YouTubeTranscript {
|
||||
var transcript string
|
||||
if transcript, err = fabric.YouTube.GrabTranscript(videoId); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if currentFlags.Message != "" {
|
||||
currentFlags.Message = currentFlags.Message + "\n" + transcript
|
||||
} else {
|
||||
currentFlags.Message = transcript
|
||||
}
|
||||
}
|
||||
|
||||
if currentFlags.YouTubeComments {
|
||||
var comments []string
|
||||
if comments, err = fabric.YouTube.GrabComments(videoId); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
commentsString := strings.Join(comments, "\n")
|
||||
|
||||
if currentFlags.Message != "" {
|
||||
currentFlags.Message = currentFlags.Message + "\n" + commentsString
|
||||
} else {
|
||||
currentFlags.Message = commentsString
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var chatter *core.Chatter
|
||||
if chatter, err = fabric.GetChatter(currentFlags.Model, currentFlags.Stream); err != nil {
|
||||
return
|
||||
|
@ -34,6 +34,9 @@ type Flags struct {
|
||||
Output string `short:"o" long:"output" description:"Output to file" default:""`
|
||||
LatestPatterns string `short:"n" long:"latest" description:"Number of latest patterns to list" default:"0"`
|
||||
ChangeDefaultModel bool `short:"d" long:"changeDefaultModel" description:"Change default pattern"`
|
||||
YouTube string `short:"y" long:"youtube" description:"YouTube video url to grab transcript, comments from it and send to chat"`
|
||||
YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat"`
|
||||
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
|
||||
}
|
||||
|
||||
// Init Initialize flags. returns a Flags struct and an error
|
||||
|
1
go.mod
1
go.mod
@ -30,6 +30,7 @@ require (
|
||||
dario.cat/mergo v1.0.0 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.1 // indirect
|
||||
github.com/ProtonMail/go-crypto v1.0.0 // indirect
|
||||
github.com/anaskhan96/soup v1.2.5 // indirect
|
||||
github.com/cloudflare/circl v1.3.7 // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.2.4 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
|
4
go.sum
4
go.sum
@ -19,6 +19,8 @@ github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migc
|
||||
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
|
||||
github.com/ProtonMail/go-crypto v1.0.0 h1:LRuvITjQWX+WIfr930YHG2HNfjR1uOfyf5vE0kC2U78=
|
||||
github.com/ProtonMail/go-crypto v1.0.0/go.mod h1:EjAoLdwvbIOoOQr3ihjnSoLZRtE8azugULFRteWMNc0=
|
||||
github.com/anaskhan96/soup v1.2.5 h1:V/FHiusdTrPrdF4iA1YkVxsOpdNcgvqT1hG+YtcZ5hM=
|
||||
github.com/anaskhan96/soup v1.2.5/go.mod h1:6YnEp9A2yywlYdM4EgDz9NEHclocMepEtku7wg6Cq3s=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
@ -145,6 +147,7 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
@ -187,6 +190,7 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
|
@ -1,7 +1,18 @@
|
||||
package youtube
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/anaskhan96/soup"
|
||||
"github.com/danielmiessler/fabric/common"
|
||||
"google.golang.org/api/option"
|
||||
"google.golang.org/api/youtube/v3"
|
||||
"log"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func NewYouTube() (ret *YouTube) {
|
||||
@ -22,4 +33,218 @@ func NewYouTube() (ret *YouTube) {
|
||||
type YouTube struct {
|
||||
*common.Configurable
|
||||
ApiKey *common.SetupQuestion
|
||||
|
||||
service *youtube.Service
|
||||
}
|
||||
|
||||
func (o *YouTube) initService() (err error) {
|
||||
if o.service == nil {
|
||||
ctx := context.Background()
|
||||
o.service, err = youtube.NewService(ctx, option.WithAPIKey(o.ApiKey.Value))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) GetVideoId(url string) (ret string, err error) {
|
||||
if err = o.initService(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
pattern := `(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})`
|
||||
re := regexp.MustCompile(pattern)
|
||||
match := re.FindStringSubmatch(url)
|
||||
if len(match) > 1 {
|
||||
ret = match[1]
|
||||
} else {
|
||||
err = fmt.Errorf("invalid YouTube URL, can't get video ID")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscriptForUrl(url string) (ret string, err error) {
|
||||
var videoId string
|
||||
if videoId, err = o.GetVideoId(url); err != nil {
|
||||
return
|
||||
}
|
||||
return o.GrabTranscript(videoId)
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscript(videoId string) (ret string, err error) {
|
||||
var transcript string
|
||||
if transcript, err = o.GrabTranscriptBase(videoId); err != nil {
|
||||
err = fmt.Errorf("transcript not available. (%v)", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse the XML transcript
|
||||
doc := soup.HTMLParse(transcript)
|
||||
// Extract the text content from the <text> tags
|
||||
textTags := doc.FindAll("text")
|
||||
var textBuilder strings.Builder
|
||||
for _, textTag := range textTags {
|
||||
textBuilder.WriteString(textTag.Text())
|
||||
textBuilder.WriteString(" ")
|
||||
ret = textBuilder.String()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscriptBase(videoId string) (ret string, err error) {
|
||||
if err = o.initService(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
url := "https://www.youtube.com/watch?v=" + videoId
|
||||
var resp string
|
||||
if resp, err = soup.Get(url); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
doc := soup.HTMLParse(resp)
|
||||
scriptTags := doc.FindAll("script")
|
||||
for _, scriptTag := range scriptTags {
|
||||
if strings.Contains(scriptTag.Text(), "captionTracks") {
|
||||
regex := regexp.MustCompile(`"captionTracks":(\[.*?\])`)
|
||||
match := regex.FindStringSubmatch(scriptTag.Text())
|
||||
if len(match) > 1 {
|
||||
var captionTracks []struct {
|
||||
BaseURL string `json:"baseUrl"`
|
||||
}
|
||||
|
||||
if err = json.Unmarshal([]byte(match[1]), &captionTracks); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if len(captionTracks) > 0 {
|
||||
transcriptURL := captionTracks[0].BaseURL
|
||||
ret, err = soup.Get(transcriptURL)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
err = fmt.Errorf("transcript not found")
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabComments(videoId string) (ret []string, err error) {
|
||||
if err = o.initService(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
call := o.service.CommentThreads.List([]string{"snippet", "replies"}).VideoId(videoId).TextFormat("plainText").MaxResults(100)
|
||||
var response *youtube.CommentThreadListResponse
|
||||
if response, err = call.Do(); err != nil {
|
||||
log.Printf("Failed to fetch comments: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, item := range response.Items {
|
||||
topLevelComment := item.Snippet.TopLevelComment.Snippet.TextDisplay
|
||||
ret = append(ret, topLevelComment)
|
||||
|
||||
if item.Replies != nil {
|
||||
for _, reply := range item.Replies.Comments {
|
||||
replyText := reply.Snippet.TextDisplay
|
||||
ret = append(ret, " - "+replyText)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabDurationForUrl(url string) (ret int, err error) {
|
||||
if err = o.initService(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var videoId string
|
||||
if videoId, err = o.GetVideoId(url); err != nil {
|
||||
return
|
||||
}
|
||||
return o.GrabDuration(videoId)
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabDuration(videoId string) (ret int, err error) {
|
||||
var videoResponse *youtube.VideoListResponse
|
||||
if videoResponse, err = o.service.Videos.List([]string{"contentDetails"}).Id(videoId).Do(); err != nil {
|
||||
err = fmt.Errorf("error getting video details: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
durationStr := videoResponse.Items[0].ContentDetails.Duration
|
||||
|
||||
matches := regexp.MustCompile(`(?i)PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?`).FindStringSubmatch(durationStr)
|
||||
if len(matches) == 0 {
|
||||
return 0, fmt.Errorf("invalid duration string: %s", durationStr)
|
||||
}
|
||||
|
||||
hours, _ := strconv.Atoi(matches[1])
|
||||
minutes, _ := strconv.Atoi(matches[2])
|
||||
seconds, _ := strconv.Atoi(matches[3])
|
||||
|
||||
ret = hours*60 + minutes + seconds/60
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error) {
|
||||
var videoId string
|
||||
if videoId, err = o.GetVideoId(url); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
ret = &VideoInfo{}
|
||||
|
||||
if options.Duration {
|
||||
if ret.Duration, err = o.GrabDuration(videoId); err != nil {
|
||||
err = fmt.Errorf("error parsing video duration: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if options.Comments {
|
||||
if ret.Comments, err = o.GrabComments(videoId); err != nil {
|
||||
err = fmt.Errorf("error getting comments: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if options.Transcript {
|
||||
if ret.Transcript, err = o.GrabTranscript(videoId); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
Duration bool
|
||||
Transcript bool
|
||||
Comments bool
|
||||
Lang string
|
||||
}
|
||||
|
||||
type VideoInfo struct {
|
||||
Transcript string `json:"transcript"`
|
||||
Duration int `json:"duration"`
|
||||
Comments []string `json:"comments"`
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabByFlags() (ret *VideoInfo, err error) {
|
||||
options := &Options{}
|
||||
flag.BoolVar(&options.Duration, "duration", false, "Output only the duration")
|
||||
flag.BoolVar(&options.Transcript, "transcript", false, "Output only the transcript")
|
||||
flag.BoolVar(&options.Comments, "comments", false, "Output the comments on the video")
|
||||
flag.StringVar(&options.Lang, "lang", "en", "Language for the transcript (default: English)")
|
||||
flag.Parse()
|
||||
|
||||
if flag.NArg() == 0 {
|
||||
log.Fatal("Error: No URL provided.")
|
||||
}
|
||||
|
||||
url := flag.Arg(0)
|
||||
ret, err = o.Grab(url, options)
|
||||
return
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user