mirror of https://github.com/danielmiessler/fabric
Merge branch 'main' into single.poetry
commit
f0255d2d6e
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
import json
|
||||
import isodate
|
||||
import argparse
|
||||
|
||||
def get_video_id(url):
|
||||
# Extract video ID from URL
|
||||
pattern = r'(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})'
|
||||
match = re.search(pattern, url)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def main(url, options):
|
||||
# Load environment variables from .env file
|
||||
load_dotenv(os.path.expanduser('~/.config/fabric/.env'))
|
||||
|
||||
# Get YouTube API key from environment variable
|
||||
api_key = os.getenv('YOUTUBE_API_KEY')
|
||||
if not api_key:
|
||||
print("Error: YOUTUBE_API_KEY not found in ~/.config/fabric/.env")
|
||||
return
|
||||
|
||||
# Extract video ID from URL
|
||||
video_id = get_video_id(url)
|
||||
if not video_id:
|
||||
print("Invalid YouTube URL")
|
||||
return
|
||||
|
||||
try:
|
||||
# Initialize the YouTube API client
|
||||
youtube = build('youtube', 'v3', developerKey=api_key)
|
||||
|
||||
# Get video details
|
||||
video_response = youtube.videos().list(
|
||||
id=video_id,
|
||||
part='contentDetails'
|
||||
).execute()
|
||||
|
||||
# Extract video duration and convert to minutes
|
||||
duration_iso = video_response['items'][0]['contentDetails']['duration']
|
||||
duration_seconds = isodate.parse_duration(duration_iso).total_seconds()
|
||||
duration_minutes = round(duration_seconds / 60)
|
||||
|
||||
# Get video transcript
|
||||
try:
|
||||
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
||||
transcript_text = ' '.join([item['text'] for item in transcript_list])
|
||||
transcript_text = transcript_text.replace('\n', ' ')
|
||||
except Exception as e:
|
||||
transcript_text = "Transcript not available."
|
||||
|
||||
# Output based on options
|
||||
if options.duration:
|
||||
print(duration_minutes)
|
||||
elif options.transcript:
|
||||
print(transcript_text)
|
||||
else:
|
||||
# Create JSON object
|
||||
output = {
|
||||
"transcript": transcript_text,
|
||||
"duration": duration_minutes
|
||||
}
|
||||
# Print JSON object
|
||||
print(json.dumps(output))
|
||||
except HttpError as e:
|
||||
print("Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='vm (video meta) extracts metadata about a video, such as the transcript and the video\'s duration. By Daniel Miessler.')
|
||||
parser.add_argument('url', nargs='?', help='YouTube video URL')
|
||||
parser.add_argument('--duration', action='store_true', help='Output only the duration')
|
||||
parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.url:
|
||||
main(args.url, args)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
@ -0,0 +1,3 @@
|
||||
# Credit
|
||||
|
||||
Co-created by Daniel Miessler and Jason Haddix based on influences from Claude Shannon's Information Theory and Mr. Beast's insanely viral content techniques.
|
@ -0,0 +1,45 @@
|
||||
# IDENTITY and PURPOSE
|
||||
|
||||
You are an expert parser and rater of value in content. Your goal is to determine how much value a reader/listener is being provided in a given piece of content as measured by a new metric called Value Per Minute (VPM).
|
||||
|
||||
Take a deep breath and think step-by-step about how best to achieve the best outcome using the STEPS below.
|
||||
|
||||
# STEPS
|
||||
|
||||
- Fully read and understand the content and what it's trying to communicate and accomplish.
|
||||
|
||||
- Estimate the duration of the content if it were to be consumed naturally, using the algorithm below:
|
||||
|
||||
1. Count the total number of words in the provided transcript.
|
||||
2. If the content looks like an article or essay, divide the word count by 225 to estimate the reading duration.
|
||||
3. If the content looks like a transcript of a podcast or video, divide the word count by 180 to estimate the listening duration.
|
||||
4. Round the calculated duration to the nearest minute.
|
||||
5. Store that value as estimated-content-minutes.
|
||||
|
||||
- Extract all Instances Of Value being provided within the content. Instances Of Value are defined as:
|
||||
|
||||
-- Highly surprising ideas or revelations.
|
||||
-- A giveaway of something useful or valuable to the audience.
|
||||
-- Untold and interesting stories with valuable takeaways.
|
||||
-- Sharing of an uncommonly valuable resource.
|
||||
-- Sharing of secret knowledge.
|
||||
-- Exclusive content that's never been revealed before.
|
||||
-- Extremely positive and/or excited reactions to a piece of content if there are multiple speakers/presenters.
|
||||
|
||||
- Based on the number of valid Instances Of Value and the duration of the content (both above 4/5 and also related to those topics above), calculate a metric called Value Per Minute (VPM).
|
||||
|
||||
# OUTPUT INSTRUCTIONS
|
||||
|
||||
- Output a valid JSON file with the following fields for the input provided.
|
||||
|
||||
{
|
||||
estimated-content-minutes: "(estimated-content-minutes)";
|
||||
value-instances: "(list of valid value instances)",
|
||||
vpm: "(the calculated VPS score.)",
|
||||
vpm-explanation: "(A one-sentence summary of less than 20 words on how you calculated the VPM for the content.)",
|
||||
}
|
||||
|
||||
|
||||
# INPUT:
|
||||
|
||||
INPUT:
|
Loading…
Reference in New Issue