@ -1,4 +1,5 @@
import logging
import logging
import os
import time
import time
from typing import Dict , Iterator , Optional , Tuple
from typing import Dict , Iterator , Optional , Tuple
@ -25,10 +26,17 @@ class OpenAIWhisperParser(BaseBlobParser):
"""
"""
def __init__ (
def __init__ (
self , api_key : Optional [ str ] = None , * , chunk_duration_threshold : float = 0.1
self ,
api_key : Optional [ str ] = None ,
* ,
chunk_duration_threshold : float = 0.1 ,
base_url : Optional [ str ] = None ,
) :
) :
self . api_key = api_key
self . api_key = api_key
self . chunk_duration_threshold = chunk_duration_threshold
self . chunk_duration_threshold = chunk_duration_threshold
self . base_url = (
base_url if base_url is not None else os . environ . get ( " OPENAI_API_BASE " )
)
def lazy_parse ( self , blob : Blob ) - > Iterator [ Document ] :
def lazy_parse ( self , blob : Blob ) - > Iterator [ Document ] :
""" Lazily parse the blob. """
""" Lazily parse the blob. """
@ -51,11 +59,13 @@ class OpenAIWhisperParser(BaseBlobParser):
if is_openai_v1 ( ) :
if is_openai_v1 ( ) :
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
# api_key optional, defaults to `os.environ['OPENAI_API_KEY']`
client = openai . OpenAI ( api_key = self . api_key )
client = openai . OpenAI ( api_key = self . api_key , base_url = self . base_url )
else :
else :
# Set the API key if provided
# Set the API key if provided
if self . api_key :
if self . api_key :
openai . api_key = self . api_key
openai . api_key = self . api_key
if self . base_url :
openai . base_url = self . base_url
# Audio file from disk
# Audio file from disk
audio = AudioSegment . from_file ( blob . path )
audio = AudioSegment . from_file ( blob . path )