fabric/helpers/ts.py

from dotenv import load_dotenv
from pydub import AudioSegment
from openai import OpenAI
import os
import argparse


class Whisper:
    def __init__(self):
        env_file = os.path.expanduser("~/.config/fabric/.env")
        load_dotenv(env_file)
        try:
            apikey = os.environ["OPENAI_API_KEY"]
            self.client = OpenAI()
            self.client.api_key = apikey
        except KeyError:
            print("OPENAI_API_KEY not found in environment variables.")

        except FileNotFoundError:
            print("No API key found. Use the --apikey option to set the key")
        self.whole_response = []

    def split_audio(self, file_path):
        """
        Splits the audio file into segments of the given length.

        Args:
        - file_path: The path to the audio file.
        - segment_length_ms: Length of each segment in milliseconds.

        Returns:
        - A list of audio segments.
        """
        audio = AudioSegment.from_file(file_path)
        segments = []
        segment_length_ms = 10 * 60 * 1000  # 10 minutes in milliseconds
        for start_ms in range(0, len(audio), segment_length_ms):
            end_ms = start_ms + segment_length_ms
            segment = audio[start_ms:end_ms]
            segments.append(segment)

        return segments

    def process_segment(self, segment):
        """        Transcribe an audio file and print the transcript.

        Args:
            audio_file (str): The path to the audio file to be transcribed.

        Returns:
            None
        """

        try:
            # if audio_file.startswith("http"):
            #     response = requests.get(audio_file)
            #     response.raise_for_status()
            #     with tempfile.NamedTemporaryFile(delete=False) as f:
            #         f.write(response.content)
            #         audio_file = f.name
            audio_file = open(segment, "rb")
            response = self.client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )
            self.whole_response.append(response.text)

        except Exception as e:
            print(f"Error: {e}")

    def process_file(self, audio_file):
        """        Transcribe an audio file and print the transcript.

        Args:
            audio_file (str): The path to the audio file to be transcribed.

        Returns:
            None
        """

        try:
            # if audio_file.startswith("http"):
            #     response = requests.get(audio_file)
            #     response.raise_for_status()
            #     with tempfile.NamedTemporaryFile(delete=False) as f:
            #         f.write(response.content)
            #         audio_file = f.name

            segments = self.split_audio(audio_file)
            for i, segment in enumerate(segments):
                segment_file_path = f"segment_{i}.mp3"
                segment.export(segment_file_path, format="mp3")
                self.process_segment(segment_file_path)
            print(' '.join(self.whole_response))

        except Exception as e:
            print(f"Error: {e}")


def main():
    parser = argparse.ArgumentParser(description="Transcribe an audio file.")
    parser.add_argument(
        "audio_file", help="The path to the audio file to be transcribed.")
    args = parser.parse_args()
    whisper = Whisper()
    whisper.process_file(args.audio_file)


if __name__ == "__main__":
    main()