added transcription

2024-11-08 07:11:06 +00:00 · 2024-03-03 08:42:40 -05:00 · 2024-03-03 08:42:40 -05:00 · 0ab8052c69
commit 0ab8052c69
parent 70356b34c6
4 changed files with 153 additions and 1 deletions
--- a/helpers/README.md
+++ b/helpers/README.md
@ -6,6 +6,23 @@ These are helper tools to work with Fabric. Examples include things like getting

 `yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.

+## ts (Audio transcriptions)
+
+'ts' is a command that uses the OpenApi Whisper API to transcribe audio files. Due to the context window, this tool uses pydub to split the files into 10 minute segments. for more information on pydub, please refer https://github.com/jiaaro/pydub
+
+### installation
+
+```bash
+mac:
+brew install ffmpeg
+
+linux:
+apt install ffmpeg
+
+windows:
+download instructions https://www.ffmpeg.org/download.html
+```
+
 ```bash
 usage: yt [-h] [--duration] [--transcript] [url]

@ -19,3 +36,16 @@ options:
  --duration    Output only the duration
  --transcript  Output only the transcript
 ```
+
+```bash
+ts -h
+usage: ts [-h] audio_file
+
+Transcribe an audio file.
+
+positional arguments:
+  audio_file  The path to the audio file to be transcribed.
+
+options:
+  -h, --help  show this help message and exit
+```
--- a/helpers/ts
+++ b/helpers/ts
@ -0,0 +1,110 @@
+from dotenv import load_dotenv
+from pydub import AudioSegment
+from openai import OpenAI
+import os
+import argparse
+
+
+class Whisper:
+    def __init__(self):
+        env_file = os.path.expanduser("~/.config/fabric/.env")
+        load_dotenv(env_file)
+        try:
+            apikey = os.environ["OPENAI_API_KEY"]
+            self.client = OpenAI()
+            self.client.api_key = apikey
+        except KeyError:
+            print("OPENAI_API_KEY not found in environment variables.")
+
+        except FileNotFoundError:
+            print("No API key found. Use the --apikey option to set the key")
+        self.whole_response = []
+
+    def split_audio(self, file_path):
+        """
+        Splits the audio file into segments of the given length.
+
+        Args:
+        - file_path: The path to the audio file.
+        - segment_length_ms: Length of each segment in milliseconds.
+
+        Returns:
+        - A list of audio segments.
+        """
+        audio = AudioSegment.from_file(file_path)
+        segments = []
+        segment_length_ms = 10 * 60 * 1000  # 10 minutes in milliseconds
+        for start_ms in range(0, len(audio), segment_length_ms):
+            end_ms = start_ms + segment_length_ms
+            segment = audio[start_ms:end_ms]
+            segments.append(segment)
+
+        return segments
+
+    def process_segment(self, segment):
+        """        Transcribe an audio file and print the transcript.
+
+        Args:
+            audio_file (str): The path to the audio file to be transcribed.
+
+        Returns:
+            None
+        """
+
+        try:
+            # if audio_file.startswith("http"):
+            #     response = requests.get(audio_file)
+            #     response.raise_for_status()
+            #     with tempfile.NamedTemporaryFile(delete=False) as f:
+            #         f.write(response.content)
+            #         audio_file = f.name
+            audio_file = open(segment, "rb")
+            response = self.client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file
+            )
+            self.whole_response.append(response.text)
+
+        except Exception as e:
+            print(f"Error: {e}")
+
+    def process_file(self, audio_file):
+        """        Transcribe an audio file and print the transcript.
+
+        Args:
+            audio_file (str): The path to the audio file to be transcribed.
+
+        Returns:
+            None
+        """
+
+        try:
+            # if audio_file.startswith("http"):
+            #     response = requests.get(audio_file)
+            #     response.raise_for_status()
+            #     with tempfile.NamedTemporaryFile(delete=False) as f:
+            #         f.write(response.content)
+            #         audio_file = f.name
+
+            segments = self.split_audio(audio_file)
+            for i, segment in enumerate(segments):
+                segment_file_path = f"segment_{i}.mp3"
+                segment.export(segment_file_path, format="mp3")
+                self.process_segment(segment_file_path)
+            print(' '.join(self.whole_response))
+
+        except Exception as e:
+            print(f"Error: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Transcribe an audio file.")
+    parser.add_argument(
+        "audio_file", help="The path to the audio file to be transcribed.")
+    args = parser.parse_args()
+    whisper = Whisper()
+    whisper.process_file(args.audio_file)
+
+
+if __name__ == "__main__":
+    main()
--- a/poetry.lock
+++ b/poetry.lock
@ -2173,6 +2173,17 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"

+[[package]]
+name = "pydub"
+version = "0.25.1"
+description = "Manipulate audio with an simple and easy high level interface"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
+    {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
+]
+
 [[package]]
 name = "pygments"
 version = "2.17.2"
@ -3418,4 +3429,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "d271a65f7140d66396aeee1e518f27c4fa9c557ce59176cb16b39a592e69c9d8"
+content-hash = "0fabc492abefedf556f97db35a35c83a8292f86b437b60dbd442010cb1e3b9a0"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -21,6 +21,7 @@ langchain-community = "^0.0.24"
 google-api-python-client = "^2.120.0"
 isodate = "^0.6.1"
 youtube-transcript-api = "^0.6.2"
+pydub = "^0.25.1"

 [tool.poetry.group.cli.dependencies]
 pyyaml = "^6.0.1"