fabric/installer/client/cli/yt.py

import re
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from datetime import datetime
import os
import json
import isodate
import argparse
import sys


def get_video_id(url):
    # Extract video ID from URL
    pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    match = re.search(pattern, url)
    return match.group(1) if match else None


def get_comments(youtube, video_id):
    comments = []

    try:
        # Fetch top-level comments
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100  # Adjust based on needs
        )

        while request:
            response = request.execute()
            for item in response['items']:
                # Top-level comment
                topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                comments.append(topLevelComment)
                
                # Check if there are replies in the thread
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        replyText = reply['snippet']['textDisplay']
                        # Add incremental spacing and a dash for replies
                        comments.append("    - " + replyText)
            
            # Prepare the next page of comments, if available
            if 'nextPageToken' in response:
                request = youtube.commentThreads().list_next(
                    previous_request=request, previous_response=response)
            else:
                request = None

    except HttpError as e:
        print(f"Failed to fetch comments: {e}")

    return comments


def main_function(url, options):
    # Load environment variables from .env file
    load_dotenv(os.path.expanduser("~/.config/fabric/.env"))

    # Get YouTube API key from environment variable
    api_key = os.getenv("YOUTUBE_API_KEY")
    if not api_key:
        print("Error: YOUTUBE_API_KEY not found in ~/.config/fabric/.env")
        return

    # Extract video ID from URL
    video_id = get_video_id(url)
    if not video_id:
        print("Invalid YouTube URL")
        return

    try:
        # Initialize the YouTube API client
        youtube = build("youtube", "v3", developerKey=api_key)

        # Get video details
        video_response = youtube.videos().list(
            id=video_id, part="contentDetails,snippet").execute()

        # Extract video duration and convert to minutes
        duration_iso = video_response["items"][0]["contentDetails"]["duration"]
        duration_seconds = isodate.parse_duration(duration_iso).total_seconds()
        duration_minutes = round(duration_seconds / 60)
        # Set up metadata
        metadata = {}
        metadata['id'] = video_response['items'][0]['id']
        metadata['title'] = video_response['items'][0]['snippet']['title']
        metadata['channel'] = video_response['items'][0]['snippet']['channelTitle']
        metadata['published_at'] = video_response['items'][0]['snippet']['publishedAt']

        # Get video transcript
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[options.lang])
            transcript_text = " ".join([item["text"] for item in transcript_list])
            transcript_text = transcript_text.replace("\n", " ")
        except Exception as e:
            transcript_text = f"Transcript not available in the selected language ({options.lang}). ({e})"

        # Get comments if the flag is set
        comments = []
        if options.comments:
            comments = get_comments(youtube, video_id)

        # Output based on options
        if options.duration:
            print(duration_minutes)
        elif options.transcript:
            print(transcript_text.encode('utf-8').decode('unicode-escape'))
        elif options.comments:
            print(json.dumps(comments, indent=2))
        elif options.metadata:
            print(json.dumps(metadata, indent=2))
        else:
            # Create JSON object with all data
            output = {
                "transcript": transcript_text,
                "duration": duration_minutes,
                "comments": comments,
                "metadata": metadata
            }
            # Print JSON object
            print(json.dumps(output, indent=2))
    except HttpError as e:
        print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")


def main():
    parser = argparse.ArgumentParser(
        description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')
    parser.add_argument('url', help='YouTube video URL')
    parser.add_argument('--duration', action='store_true', help='Output only the duration')
    parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
    parser.add_argument('--comments', action='store_true', help='Output the comments on the video')
    parser.add_argument('--metadata', action='store_true', help='Output the video metadata')
    parser.add_argument('--lang', default='en', help='Language for the transcript (default: English)')
    
    args = parser.parse_args()

    if args.url is None:
        print("Error: No URL provided.")
        return

    main_function(args.url, args)

if __name__ == "__main__":
    main()
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`import re`
			`from googleapiclient.discovery import build`
			`from googleapiclient.errors import HttpError`
			`from youtube_transcript_api import YouTubeTranscriptApi`
			`from dotenv import load_dotenv`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`from datetime import datetime`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`import os`
			`import json`
			`import isodate`
			`import argparse`
modified yt to also accept urls via stdin 2024-03-16 02:18:24 +00:00			`import sys`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
added yt and ts to poetry and to config in setup.sh 2024-03-03 15:57:49 +00:00
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`def get_video_id(url):`
			`# Extract video ID from URL`
Updated readme. 2024-03-13 21:15:14 +00:00			`pattern = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/\|(?:v\|e(?:mbed)?)\/\|\S*?[?&]v=)\|youtu\.be\/)([a-zA-Z0-9_-]{11})"`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`match = re.search(pattern, url)`
			`return match.group(1) if match else None`

added yt and ts to poetry and to config in setup.sh 2024-03-03 15:57:49 +00:00
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`def get_comments(youtube, video_id):`
			`comments = []`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`try:`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`# Fetch top-level comments`
			`request = youtube.commentThreads().list(`
			`part="snippet,replies",`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`videoId=video_id,`
			`textFormat="plainText",`
			`maxResults=100 # Adjust based on needs`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`)`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`while request:`
			`response = request.execute()`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`for item in response['items']:`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`# Top-level comment`
			`topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']`
			`comments.append(topLevelComment)`

			`# Check if there are replies in the thread`
			`if 'replies' in item:`
			`for reply in item['replies']['comments']:`
			`replyText = reply['snippet']['textDisplay']`
			`# Add incremental spacing and a dash for replies`
			`comments.append(" - " + replyText)`

			`# Prepare the next page of comments, if available`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`if 'nextPageToken' in response:`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`request = youtube.commentThreads().list_next(`
			`previous_request=request, previous_response=response)`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`else:`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00			`request = None`

Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`except HttpError as e:`
			`print(f"Failed to fetch comments: {e}")`
yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`return comments`


yt comments includes reply threads. Readme updated. 2024-03-17 07:29:56 +00:00
fixed yt 2024-03-03 19:09:02 +00:00			`def main_function(url, options):`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`# Load environment variables from .env file`
Updated readme. 2024-03-13 21:15:14 +00:00			`load_dotenv(os.path.expanduser("~/.config/fabric/.env"))`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
			`# Get YouTube API key from environment variable`
Updated readme. 2024-03-13 21:15:14 +00:00			`api_key = os.getenv("YOUTUBE_API_KEY")`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`if not api_key:`
			`print("Error: YOUTUBE_API_KEY not found in ~/.config/fabric/.env")`
			`return`

			`# Extract video ID from URL`
			`video_id = get_video_id(url)`
			`if not video_id:`
			`print("Invalid YouTube URL")`
			`return`

			`try:`
			`# Initialize the YouTube API client`
Updated readme. 2024-03-13 21:15:14 +00:00			`youtube = build("youtube", "v3", developerKey=api_key)`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
			`# Get video details`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`video_response = youtube.videos().list(`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`id=video_id, part="contentDetails,snippet").execute()`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
			`# Extract video duration and convert to minutes`
Updated readme. 2024-03-13 21:15:14 +00:00			`duration_iso = video_response["items"][0]["contentDetails"]["duration"]`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`duration_seconds = isodate.parse_duration(duration_iso).total_seconds()`
			`duration_minutes = round(duration_seconds / 60)`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`# Set up metadata`
			`metadata = {}`
			`metadata['id'] = video_response['items'][0]['id']`
			`metadata['title'] = video_response['items'][0]['snippet']['title']`
			`metadata['channel'] = video_response['items'][0]['snippet']['channelTitle']`
			`metadata['published_at'] = video_response['items'][0]['snippet']['publishedAt']`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
			`# Get video transcript`
			`try:`
modified: installer/client/cli/yt.py 2024-03-27 12:20:11 +00:00			`transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[options.lang])`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`transcript_text = " ".join([item["text"] for item in transcript_list])`
Updated readme. 2024-03-13 21:15:14 +00:00			`transcript_text = transcript_text.replace("\n", " ")`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`except Exception as e:`
modified: installer/client/cli/yt.py 2024-03-27 12:20:11 +00:00			`transcript_text = f"Transcript not available in the selected language ({options.lang}). ({e})"`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`# Get comments if the flag is set`
			`comments = []`
			`if options.comments:`
			`comments = get_comments(youtube, video_id)`

Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`# Output based on options`
			`if options.duration:`
			`print(duration_minutes)`
			`elif options.transcript:`
Fixed Latin-1 decode problems Fixes Latin-1 decode problems 2024-03-25 10:00:34 +00:00			`print(transcript_text.encode('utf-8').decode('unicode-escape'))`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`elif options.comments:`
			`print(json.dumps(comments, indent=2))`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`elif options.metadata:`
			`print(json.dumps(metadata, indent=2))`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`else:`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`# Create JSON object with all data`
			`output = {`
			`"transcript": transcript_text,`
			`"duration": duration_minutes,`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`"comments": comments,`
			`"metadata": metadata`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`}`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`# Print JSON object`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`print(json.dumps(output, indent=2))`
Added /helpers/vm which downloads youtube transcripts and accurate durations of videos using your own YouTube API key. 2024-02-15 05:21:20 +00:00			`except HttpError as e:`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")`
added yt and ts to poetry and to config in setup.sh 2024-03-03 15:57:49 +00:00

			`def main():`
			`parser = argparse.ArgumentParser(`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')`
fixed yt...again 2024-03-14 18:29:56 +00:00			`parser.add_argument('url', help='YouTube video URL')`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`parser.add_argument('--duration', action='store_true', help='Output only the duration')`
			`parser.add_argument('--transcript', action='store_true', help='Output only the transcript')`
			`parser.add_argument('--comments', action='store_true', help='Output the comments on the video')`
feat: add metadata flag to yt cli Output includes: id, title, channel, and published_at 2024-05-02 14:29:14 +00:00			`parser.add_argument('--metadata', action='store_true', help='Output the video metadata')`
modified: installer/client/cli/yt.py 2024-03-27 12:20:11 +00:00			`parser.add_argument('--lang', default='en', help='Language for the transcript (default: English)')`

fixed yt 2024-03-14 18:15:59 +00:00			`args = parser.parse_args()`
modified yt to also accept urls via stdin 2024-03-16 02:18:24 +00:00
			`if args.url is None:`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00			`print("Error: No URL provided.")`
			`return`
modified yt to also accept urls via stdin 2024-03-16 02:18:24 +00:00
fixed yt 2024-03-14 18:15:59 +00:00			`main_function(args.url, args)`
Added comment retrieval option to yt.py 2024-03-17 06:18:04 +00:00
			`if __name__ == "__main__":`
			`main()`