added transcription

This commit is contained in:
jad2121 2024-03-03 08:42:40 -05:00
parent 70356b34c6
commit 0ab8052c69
4 changed files with 153 additions and 1 deletions

View File

@ -6,6 +6,23 @@ These are helper tools to work with Fabric. Examples include things like getting
`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.
## ts (Audio transcriptions)
'ts' is a command that uses the OpenApi Whisper API to transcribe audio files. Due to the context window, this tool uses pydub to split the files into 10 minute segments. for more information on pydub, please refer https://github.com/jiaaro/pydub
### installation
```bash
mac:
brew install ffmpeg
linux:
apt install ffmpeg
windows:
download instructions https://www.ffmpeg.org/download.html
```
```bash
usage: yt [-h] [--duration] [--transcript] [url]
@ -19,3 +36,16 @@ options:
--duration Output only the duration
--transcript Output only the transcript
```
```bash
ts -h
usage: ts [-h] audio_file
Transcribe an audio file.
positional arguments:
audio_file The path to the audio file to be transcribed.
options:
-h, --help show this help message and exit
```

110
helpers/ts Normal file
View File

@ -0,0 +1,110 @@
from dotenv import load_dotenv
from pydub import AudioSegment
from openai import OpenAI
import os
import argparse
class Whisper:
def __init__(self):
env_file = os.path.expanduser("~/.config/fabric/.env")
load_dotenv(env_file)
try:
apikey = os.environ["OPENAI_API_KEY"]
self.client = OpenAI()
self.client.api_key = apikey
except KeyError:
print("OPENAI_API_KEY not found in environment variables.")
except FileNotFoundError:
print("No API key found. Use the --apikey option to set the key")
self.whole_response = []
def split_audio(self, file_path):
"""
Splits the audio file into segments of the given length.
Args:
- file_path: The path to the audio file.
- segment_length_ms: Length of each segment in milliseconds.
Returns:
- A list of audio segments.
"""
audio = AudioSegment.from_file(file_path)
segments = []
segment_length_ms = 10 * 60 * 1000 # 10 minutes in milliseconds
for start_ms in range(0, len(audio), segment_length_ms):
end_ms = start_ms + segment_length_ms
segment = audio[start_ms:end_ms]
segments.append(segment)
return segments
def process_segment(self, segment):
""" Transcribe an audio file and print the transcript.
Args:
audio_file (str): The path to the audio file to be transcribed.
Returns:
None
"""
try:
# if audio_file.startswith("http"):
# response = requests.get(audio_file)
# response.raise_for_status()
# with tempfile.NamedTemporaryFile(delete=False) as f:
# f.write(response.content)
# audio_file = f.name
audio_file = open(segment, "rb")
response = self.client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
self.whole_response.append(response.text)
except Exception as e:
print(f"Error: {e}")
def process_file(self, audio_file):
""" Transcribe an audio file and print the transcript.
Args:
audio_file (str): The path to the audio file to be transcribed.
Returns:
None
"""
try:
# if audio_file.startswith("http"):
# response = requests.get(audio_file)
# response.raise_for_status()
# with tempfile.NamedTemporaryFile(delete=False) as f:
# f.write(response.content)
# audio_file = f.name
segments = self.split_audio(audio_file)
for i, segment in enumerate(segments):
segment_file_path = f"segment_{i}.mp3"
segment.export(segment_file_path, format="mp3")
self.process_segment(segment_file_path)
print(' '.join(self.whole_response))
except Exception as e:
print(f"Error: {e}")
def main():
parser = argparse.ArgumentParser(description="Transcribe an audio file.")
parser.add_argument(
"audio_file", help="The path to the audio file to be transcribed.")
args = parser.parse_args()
whisper = Whisper()
whisper.process_file(args.audio_file)
if __name__ == "__main__":
main()

13
poetry.lock generated
View File

@ -2173,6 +2173,17 @@ files = [
[package.dependencies]
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pydub"
version = "0.25.1"
description = "Manipulate audio with an simple and easy high level interface"
optional = false
python-versions = "*"
files = [
{file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
{file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
]
[[package]]
name = "pygments"
version = "2.17.2"
@ -3418,4 +3429,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "d271a65f7140d66396aeee1e518f27c4fa9c557ce59176cb16b39a592e69c9d8"
content-hash = "0fabc492abefedf556f97db35a35c83a8292f86b437b60dbd442010cb1e3b9a0"

View File

@ -21,6 +21,7 @@ langchain-community = "^0.0.24"
google-api-python-client = "^2.120.0"
isodate = "^0.6.1"
youtube-transcript-api = "^0.6.2"
pydub = "^0.25.1"
[tool.poetry.group.cli.dependencies]
pyyaml = "^6.0.1"