fabric/installer/client/cli/yt.py
obswork 90ecbde180 feat: add metadata flag to yt cli
Output includes: id, title, channel, and published_at
2024-05-02 13:36:04 -04:00

152 lines
5.6 KiB
Python

import re
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from datetime import datetime
import os
import json
import isodate
import argparse
import sys
def get_video_id(url):
# Extract video ID from URL
pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(pattern, url)
return match.group(1) if match else None
def get_comments(youtube, video_id):
comments = []
try:
# Fetch top-level comments
request = youtube.commentThreads().list(
part="snippet,replies",
videoId=video_id,
textFormat="plainText",
maxResults=100 # Adjust based on needs
)
while request:
response = request.execute()
for item in response['items']:
# Top-level comment
topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(topLevelComment)
# Check if there are replies in the thread
if 'replies' in item:
for reply in item['replies']['comments']:
replyText = reply['snippet']['textDisplay']
# Add incremental spacing and a dash for replies
comments.append(" - " + replyText)
# Prepare the next page of comments, if available
if 'nextPageToken' in response:
request = youtube.commentThreads().list_next(
previous_request=request, previous_response=response)
else:
request = None
except HttpError as e:
print(f"Failed to fetch comments: {e}")
return comments
def main_function(url, options):
# Load environment variables from .env file
load_dotenv(os.path.expanduser("~/.config/fabric/.env"))
# Get YouTube API key from environment variable
api_key = os.getenv("YOUTUBE_API_KEY")
if not api_key:
print("Error: YOUTUBE_API_KEY not found in ~/.config/fabric/.env")
return
# Extract video ID from URL
video_id = get_video_id(url)
if not video_id:
print("Invalid YouTube URL")
return
try:
# Initialize the YouTube API client
youtube = build("youtube", "v3", developerKey=api_key)
# Get video details
video_response = youtube.videos().list(
id=video_id, part="contentDetails,snippet").execute()
# Extract video duration and convert to minutes
duration_iso = video_response["items"][0]["contentDetails"]["duration"]
duration_seconds = isodate.parse_duration(duration_iso).total_seconds()
duration_minutes = round(duration_seconds / 60)
# Set up metadata
metadata = {}
metadata['id'] = video_response['items'][0]['id']
metadata['title'] = video_response['items'][0]['snippet']['title']
metadata['channel'] = video_response['items'][0]['snippet']['channelTitle']
metadata['published_at'] = video_response['items'][0]['snippet']['publishedAt']
# Get video transcript
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[options.lang])
transcript_text = " ".join([item["text"] for item in transcript_list])
transcript_text = transcript_text.replace("\n", " ")
except Exception as e:
transcript_text = f"Transcript not available in the selected language ({options.lang}). ({e})"
# Get comments if the flag is set
comments = []
if options.comments:
comments = get_comments(youtube, video_id)
# Output based on options
if options.duration:
print(duration_minutes)
elif options.transcript:
print(transcript_text.encode('utf-8').decode('unicode-escape'))
elif options.comments:
print(json.dumps(comments, indent=2))
elif options.metadata:
print(json.dumps(metadata, indent=2))
else:
# Create JSON object with all data
output = {
"transcript": transcript_text,
"duration": duration_minutes,
"comments": comments,
"metadata": metadata
}
# Print JSON object
print(json.dumps(output, indent=2))
except HttpError as e:
print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")
def main():
parser = argparse.ArgumentParser(
description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')
parser.add_argument('url', help='YouTube video URL')
parser.add_argument('--duration', action='store_true', help='Output only the duration')
parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
parser.add_argument('--comments', action='store_true', help='Output the comments on the video')
parser.add_argument('--metadata', action='store_true', help='Output the video metadata')
parser.add_argument('--lang', default='en', help='Language for the transcript (default: English)')
args = parser.parse_args()
if args.url is None:
print("Error: No URL provided.")
return
main_function(args.url, args)
if __name__ == "__main__":
main()