Merge pull request #264 from raisindetre/local-changes

PR - Added YT user comments retrieval to yt.py helper
2024-11-08 07:11:06 +00:00 · 2024-03-18 09:05:00 -07:00 · 2024-03-18 09:05:00 -07:00 · 1fa85d9275
commit 1fa85d9275
parent 1de0422b18 e6df0f93f0
2 changed files with 72 additions and 22 deletions
--- a/helper_files/README.md
+++ b/helper_files/README.md
@ -4,7 +4,7 @@ These are helper tools to work with Fabric. Examples include things like getting

 ## yt (YouTube)

-`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.
+`yt` is a command that uses the YouTube API to pull transcripts, pull user comments, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.

 ```bash
 usage: yt [-h] [--duration] [--transcript] [url]
@ -15,9 +15,10 @@ positional arguments:
  url           YouTube video URL

 options:
-  -h, --help    show this help message and exit
+  -h, --help    Show this help message and exit
  --duration    Output only the duration
  --transcript  Output only the transcript
+  --comments    Output only the user comments 
 ```

 ## ts (Audio transcriptions)
@ -49,7 +50,7 @@ positional arguments:

 options:
  -h, --help  show this help message and exit
-
+````
 ## save

 `save` is a "tee-like" utility to pipeline saving of content, while keeping the output stream intact. Can optionally generate "frontmatter" for PKM utilities like Obsidian via the
--- a/installer/client/cli/yt.py
+++ b/installer/client/cli/yt.py
@ -17,6 +17,46 @@ def get_video_id(url):
    return match.group(1) if match else None


+def get_comments(youtube, video_id):
+    comments = []
+
+    try:
+        # Fetch top-level comments
+        request = youtube.commentThreads().list(
+            part="snippet,replies",
+            videoId=video_id,
+            textFormat="plainText",
+            maxResults=100  # Adjust based on needs
+        )
+
+        while request:
+            response = request.execute()
+            for item in response['items']:
+                # Top-level comment
+                topLevelComment = item['snippet']['topLevelComment']['snippet']['textDisplay']
+                comments.append(topLevelComment)
+                
+                # Check if there are replies in the thread
+                if 'replies' in item:
+                    for reply in item['replies']['comments']:
+                        replyText = reply['snippet']['textDisplay']
+                        # Add incremental spacing and a dash for replies
+                        comments.append("    - " + replyText)
+            
+            # Prepare the next page of comments, if available
+            if 'nextPageToken' in response:
+                request = youtube.commentThreads().list_next(
+                    previous_request=request, previous_response=response)
+            else:
+                request = None
+
+    except HttpError as e:
+        print(f"Failed to fetch comments: {e}")
+
+    return comments
+
+
+
 def main_function(url, options):
    # Load environment variables from .env file
    load_dotenv(os.path.expanduser("~/.config/fabric/.env"))
@ -38,9 +78,8 @@ def main_function(url, options):
        youtube = build("youtube", "v3", developerKey=api_key)

        # Get video details
-        video_response = (
-            youtube.videos().list(id=video_id, part="contentDetails").execute()
-        )
+        video_response = youtube.videos().list(
+            id=video_id, part="contentDetails").execute()

        # Extract video duration and convert to minutes
        duration_iso = video_response["items"][0]["contentDetails"]["duration"]
@ -50,41 +89,51 @@ def main_function(url, options):
        # Get video transcript
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-            transcript_text = " ".join([item["text"]
-                                       for item in transcript_list])
+            transcript_text = " ".join([item["text"] for item in transcript_list])
            transcript_text = transcript_text.replace("\n", " ")
        except Exception as e:
            transcript_text = f"Transcript not available. ({e})"

+        # Get comments if the flag is set
+        comments = []
+        if options.comments:
+            comments = get_comments(youtube, video_id)
+
        # Output based on options
        if options.duration:
            print(duration_minutes)
        elif options.transcript:
            print(transcript_text)
+        elif options.comments:
+            print(json.dumps(comments, indent=2))
        else:
-            # Create JSON object
-            output = {"transcript": transcript_text,
-                      "duration": duration_minutes}
+            # Create JSON object with all data
+            output = {
+                "transcript": transcript_text,
+                "duration": duration_minutes,
+                "comments": comments
+            }
            # Print JSON object
-            print(json.dumps(output))
+            print(json.dumps(output, indent=2))
    except HttpError as e:
-
-        print(
-            f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")
+        print(f"Error: Failed to access YouTube API. Please check your YOUTUBE_API_KEY and ensure it is valid: {e}")


 def main():
    parser = argparse.ArgumentParser(
-        description='yt (video meta) extracts metadata about a video, such as the transcript and the video\'s duration. By Daniel Miessler.')
-    # Ensure 'url' is defined once
+        description='yt (video meta) extracts metadata about a video, such as the transcript, the video\'s duration, and now comments. By Daniel Miessler.')
    parser.add_argument('url', help='YouTube video URL')
-    parser.add_argument('--duration', action='store_true',
-                        help='Output only the duration')
-    parser.add_argument('--transcript', action='store_true',
-                        help='Output only the transcript')
+    parser.add_argument('--duration', action='store_true', help='Output only the duration')
+    parser.add_argument('--transcript', action='store_true', help='Output only the transcript')
+    parser.add_argument('--comments', action='store_true', help='Output the comments on the video')
+
    args = parser.parse_args()

    if args.url is None:
-        args.url = sys.stdin.readline().strip()
+        print("Error: No URL provided.")
+        return

    main_function(args.url, args)
+
+if __name__ == "__main__":
+    main()