fabric/installer/client/cli/yt.py

import re
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from datetime import datetime
import os
import json
import isodate
import argparse
import sys


def get_video_id(url):
    # Extract video ID from URL
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_comments(youtube, video_id):
    comments = []

    try:
        # Fetch top-level comments
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100  # Adjust based on needs
        )

        while request:
            response = request.execute()
            for item in response['items']:
                # Top-level comment
                topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                comments.append(topLevelComment)

                # Check if there are replies in the thread
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        replyText = reply['snippet']['textDisplay']
                        # Add incremental spacing and a dash for replies
                        comments.append("    - " + replyText)

            # Prepare the next page of comments, if available
            if 'nextPageToken' in response:
                request = youtube.commentThreads().list_next(
                    previous_request=request, previous_response=response)
            else:
                request = None

    except HttpError as e:
        print(f"Failed to fetch comments: {e}")

    return comments


def main_function(url, options):
    # Load environment variables from .env file
    load_dotenv(os.path.expanduser("~/.config/fabric/.env"))

    # Get YouTube API key from environment variable
    api_key = os.getenv("YOUTUBE_API_KEY")
    if not api_key:
        print("Error: YOUTUBE_API_KEY not found in ~/.config/fabric/.env")
        return

    # Extract video ID from URL
    video_id = get_video_id(url)
    if not video_id:
        print("Invalid YouTube URL")
        return

    try:
        # Initialize the YouTube API client
        youtube = build("youtube", "v3", developerKey=api_key)

        # Get video details
        video_response = youtube.videos().list(
            id=video_id, part="contentDetails,snippet").execute()

        # Extract video duration and convert to minutes
        duration_iso = video_response["items"][0]["contentDetails"]["duration"]
        duration_seconds = isodate.parse_duration(duration_iso).total_seconds()
        duration_minutes = round(duration_seconds / 60)
        # Set up metadata
        metadata = {}
        metadata['id'] = video_response['items'][0]['id']
        metadata['title'] = video_response['items'][0]['snippet']['title']
        metadata['channel'] = video_response['items'][0]['snippet']['channelTitle']
        metadata['published_at'] = video_response['items'][0]['snippet']['publishedAt']

        # Get video transcript
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[options.lang])
            transcript_text = " ".join([item["text"] for item in transcript_list])
            transcript_text = transcript_text.replace("\n", " ")
        except Exception as e:
            transcript_text = f"Transcript not available in the selected language ({options.lang}). ({e})"

        # Get comments if the flag is set
        comments = []
        if options.comments:
            comments = get_comments(youtube, video_id)

        # Output based on options
        if options.duration:
            print(duration_minutes)
        elif options.transcript:
            print(transcript_text.encode('utf-8').decode('unicode-escape'))
        elif options.comments:
            print(json.dumps(comments, indent=2))
        elif options.metadata:
            print(json.dumps(metadata, indent=2))
        else:
            # Create JSON object with all data
            output = {
                "transcript": transcript_text,
                "duration": duration_minutes,
                "comments": comments,
                "metadata": metadata
            }
            # Print JSON object
            print(json.dumps(output, indent=2))
    except HttpError as e:
        print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")


def main():
    parser = argparse.ArgumentParser(
        description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')
    parser.add_argument('url', help='YouTube video URL')
    parser.add_argument('--duration', action='store_true', help='Output only the duration')
    parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
    parser.add_argument('--comments', action='store_true', help='Output the comments on the video')
    parser.add_argument('--metadata', action='store_true', help='Output the video metadata')
    parser.add_argument('--lang', default='en', help='Language for the transcript (default: English)')

    args = parser.parse_args()

    if args.url is None:
        print("Error: No URL provided.")
        return

    main_function(args.url, args)

if __name__ == "__main__":
    main()