Merge pull request #264 from raisindetre/local-changes

PR - Added YT user comments retrieval to yt.py helper
This commit is contained in:
Daniel Miessler 2024-03-18 09:05:00 -07:00 committed by GitHub
commit 1fa85d9275
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 22 deletions

View File

@ -4,7 +4,7 @@ These are helper tools to work with Fabric. Examples include things like getting
## yt (YouTube)
`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.
`yt` is a command that uses the YouTube API to pull transcripts, pull user comments, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.
```bash
usage: yt [-h] [--duration] [--transcript] [url]
@ -15,9 +15,10 @@ positional arguments:
url YouTube video URL
options:
-h, --help show this help message and exit
-h, --help Show this help message and exit
--duration Output only the duration
--transcript Output only the transcript
--comments Output only the user comments
```
## ts (Audio transcriptions)
@ -49,7 +50,7 @@ positional arguments:
options:
-h, --help show this help message and exit
````
## save
`save` is a "tee-like" utility to pipeline saving of content, while keeping the output stream intact. Can optionally generate "frontmatter" for PKM utilities like Obsidian via the

View File

@ -17,6 +17,46 @@ def get_video_id(url):
return match.group(1) if match else None
def get_comments(youtube, video_id):
comments = []
try:
# Fetch top-level comments
request = youtube.commentThreads().list(
part="snippet,replies",
videoId=video_id,
textFormat="plainText",
maxResults=100 # Adjust based on needs
)
while request:
response = request.execute()
for item in response['items']:
# Top-level comment
topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(topLevelComment)
# Check if there are replies in the thread
if 'replies' in item:
for reply in item['replies']['comments']:
replyText = reply['snippet']['textDisplay']
# Add incremental spacing and a dash for replies
comments.append(" - " + replyText)
# Prepare the next page of comments, if available
if 'nextPageToken' in response:
request = youtube.commentThreads().list_next(
previous_request=request, previous_response=response)
else:
request = None
except HttpError as e:
print(f"Failed to fetch comments: {e}")
return comments
def main_function(url, options):
# Load environment variables from .env file
load_dotenv(os.path.expanduser("~/.config/fabric/.env"))
@ -38,9 +78,8 @@ def main_function(url, options):
youtube = build("youtube", "v3", developerKey=api_key)
# Get video details
video_response = (
youtube.videos().list(id=video_id, part="contentDetails").execute()
)
video_response = youtube.videos().list(
id=video_id, part="contentDetails").execute()
# Extract video duration and convert to minutes
duration_iso = video_response["items"][0]["contentDetails"]["duration"]
@ -50,41 +89,51 @@ def main_function(url, options):
# Get video transcript
try:
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = " ".join([item["text"]
for item in transcript_list])
transcript_text = " ".join([item["text"] for item in transcript_list])
transcript_text = transcript_text.replace("\n", " ")
except Exception as e:
transcript_text = f"Transcript not available. ({e})"
# Get comments if the flag is set
comments = []
if options.comments:
comments = get_comments(youtube, video_id)
# Output based on options
if options.duration:
print(duration_minutes)
elif options.transcript:
print(transcript_text)
elif options.comments:
print(json.dumps(comments, indent=2))
else:
# Create JSON object
output = {"transcript": transcript_text,
"duration": duration_minutes}
# Create JSON object with all data
output = {
"transcript": transcript_text,
"duration": duration_minutes,
"comments": comments
}
# Print JSON object
print(json.dumps(output))
print(json.dumps(output, indent=2))
except HttpError as e:
print(
f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")
print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")
def main():
parser = argparse.ArgumentParser(
description='yt (video meta) extracts metadata about a video, such as the transcript and the video\'s duration. By Daniel Miessler.')
# Ensure 'url' is defined once
description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')
parser.add_argument('url', help='YouTube video URL')
parser.add_argument('--duration', action='store_true',
help='Output only the duration')
parser.add_argument('--transcript', action='store_true',
help='Output only the transcript')
parser.add_argument('--duration', action='store_true', help='Output only the duration')
parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
parser.add_argument('--comments', action='store_true', help='Output the comments on the video')
args = parser.parse_args()
if args.url is None:
args.url = sys.stdin.readline().strip()
print("Error: No URL provided.")
return
main_function(args.url, args)
if __name__ == "__main__":
main()