diff --git a/libs/community/langchain_community/document_loaders/parsers/audio.py b/libs/community/langchain_community/document_loaders/parsers/audio.py index 77d1b2b8e1..4afe339938 100644 --- a/libs/community/langchain_community/document_loaders/parsers/audio.py +++ b/libs/community/langchain_community/document_loaders/parsers/audio.py @@ -1,4 +1,5 @@ import logging +import os import time from typing import Dict, Iterator, Optional, Tuple @@ -25,10 +26,17 @@ class OpenAIWhisperParser(BaseBlobParser): """ def __init__( - self, api_key: Optional[str] = None, *, chunk_duration_threshold: float = 0.1 + self, + api_key: Optional[str] = None, + *, + chunk_duration_threshold: float = 0.1, + base_url: Optional[str] = None, ): self.api_key = api_key self.chunk_duration_threshold = chunk_duration_threshold + self.base_url = ( + base_url if base_url is not None else os.environ.get("OPENAI_API_BASE") + ) def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" @@ -51,11 +59,13 @@ class OpenAIWhisperParser(BaseBlobParser): if is_openai_v1(): # api_key optional, defaults to `os.environ['OPENAI_API_KEY']` - client = openai.OpenAI(api_key=self.api_key) + client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url) else: # Set the API key if provided if self.api_key: openai.api_key = self.api_key + if self.base_url: + openai.base_url = self.base_url # Audio file from disk audio = AudioSegment.from_file(blob.path)