mirror of
https://github.com/danielmiessler/fabric
synced 2024-11-08 07:11:06 +00:00
added transcription
This commit is contained in:
parent
70356b34c6
commit
0ab8052c69
@ -6,6 +6,23 @@ These are helper tools to work with Fabric. Examples include things like getting
|
||||
|
||||
`yt` is a command that uses the YouTube API to pull transcripts, get video duration, and other functions. It's primary function is to get a transcript from a video that can then be stitched (piped) into other Fabric Patterns.
|
||||
|
||||
## ts (Audio transcriptions)
|
||||
|
||||
'ts' is a command that uses the OpenApi Whisper API to transcribe audio files. Due to the context window, this tool uses pydub to split the files into 10 minute segments. for more information on pydub, please refer https://github.com/jiaaro/pydub
|
||||
|
||||
### installation
|
||||
|
||||
```bash
|
||||
mac:
|
||||
brew install ffmpeg
|
||||
|
||||
linux:
|
||||
apt install ffmpeg
|
||||
|
||||
windows:
|
||||
download instructions https://www.ffmpeg.org/download.html
|
||||
```
|
||||
|
||||
```bash
|
||||
usage: yt [-h] [--duration] [--transcript] [url]
|
||||
|
||||
@ -19,3 +36,16 @@ options:
|
||||
--duration Output only the duration
|
||||
--transcript Output only the transcript
|
||||
```
|
||||
|
||||
```bash
|
||||
ts -h
|
||||
usage: ts [-h] audio_file
|
||||
|
||||
Transcribe an audio file.
|
||||
|
||||
positional arguments:
|
||||
audio_file The path to the audio file to be transcribed.
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
```
|
||||
|
110
helpers/ts
Normal file
110
helpers/ts
Normal file
@ -0,0 +1,110 @@
|
||||
from dotenv import load_dotenv
|
||||
from pydub import AudioSegment
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import argparse
|
||||
|
||||
|
||||
class Whisper:
|
||||
def __init__(self):
|
||||
env_file = os.path.expanduser("~/.config/fabric/.env")
|
||||
load_dotenv(env_file)
|
||||
try:
|
||||
apikey = os.environ["OPENAI_API_KEY"]
|
||||
self.client = OpenAI()
|
||||
self.client.api_key = apikey
|
||||
except KeyError:
|
||||
print("OPENAI_API_KEY not found in environment variables.")
|
||||
|
||||
except FileNotFoundError:
|
||||
print("No API key found. Use the --apikey option to set the key")
|
||||
self.whole_response = []
|
||||
|
||||
def split_audio(self, file_path):
|
||||
"""
|
||||
Splits the audio file into segments of the given length.
|
||||
|
||||
Args:
|
||||
- file_path: The path to the audio file.
|
||||
- segment_length_ms: Length of each segment in milliseconds.
|
||||
|
||||
Returns:
|
||||
- A list of audio segments.
|
||||
"""
|
||||
audio = AudioSegment.from_file(file_path)
|
||||
segments = []
|
||||
segment_length_ms = 10 * 60 * 1000 # 10 minutes in milliseconds
|
||||
for start_ms in range(0, len(audio), segment_length_ms):
|
||||
end_ms = start_ms + segment_length_ms
|
||||
segment = audio[start_ms:end_ms]
|
||||
segments.append(segment)
|
||||
|
||||
return segments
|
||||
|
||||
def process_segment(self, segment):
|
||||
""" Transcribe an audio file and print the transcript.
|
||||
|
||||
Args:
|
||||
audio_file (str): The path to the audio file to be transcribed.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
try:
|
||||
# if audio_file.startswith("http"):
|
||||
# response = requests.get(audio_file)
|
||||
# response.raise_for_status()
|
||||
# with tempfile.NamedTemporaryFile(delete=False) as f:
|
||||
# f.write(response.content)
|
||||
# audio_file = f.name
|
||||
audio_file = open(segment, "rb")
|
||||
response = self.client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=audio_file
|
||||
)
|
||||
self.whole_response.append(response.text)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
def process_file(self, audio_file):
|
||||
""" Transcribe an audio file and print the transcript.
|
||||
|
||||
Args:
|
||||
audio_file (str): The path to the audio file to be transcribed.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
try:
|
||||
# if audio_file.startswith("http"):
|
||||
# response = requests.get(audio_file)
|
||||
# response.raise_for_status()
|
||||
# with tempfile.NamedTemporaryFile(delete=False) as f:
|
||||
# f.write(response.content)
|
||||
# audio_file = f.name
|
||||
|
||||
segments = self.split_audio(audio_file)
|
||||
for i, segment in enumerate(segments):
|
||||
segment_file_path = f"segment_{i}.mp3"
|
||||
segment.export(segment_file_path, format="mp3")
|
||||
self.process_segment(segment_file_path)
|
||||
print(' '.join(self.whole_response))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Transcribe an audio file.")
|
||||
parser.add_argument(
|
||||
"audio_file", help="The path to the audio file to be transcribed.")
|
||||
args = parser.parse_args()
|
||||
whisper = Whisper()
|
||||
whisper.process_file(args.audio_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
13
poetry.lock
generated
13
poetry.lock
generated
@ -2173,6 +2173,17 @@ files = [
|
||||
[package.dependencies]
|
||||
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "pydub"
|
||||
version = "0.25.1"
|
||||
description = "Manipulate audio with an simple and easy high level interface"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
|
||||
{file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.17.2"
|
||||
@ -3418,4 +3429,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "d271a65f7140d66396aeee1e518f27c4fa9c557ce59176cb16b39a592e69c9d8"
|
||||
content-hash = "0fabc492abefedf556f97db35a35c83a8292f86b437b60dbd442010cb1e3b9a0"
|
||||
|
@ -21,6 +21,7 @@ langchain-community = "^0.0.24"
|
||||
google-api-python-client = "^2.120.0"
|
||||
isodate = "^0.6.1"
|
||||
youtube-transcript-api = "^0.6.2"
|
||||
pydub = "^0.25.1"
|
||||
|
||||
[tool.poetry.group.cli.dependencies]
|
||||
pyyaml = "^6.0.1"
|
||||
|
Loading…
Reference in New Issue
Block a user