|
|
@ -80,7 +80,12 @@ def _streaming_response_template() -> Dict[str, Any]:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _create_retry_decorator(llm: Union[BaseOpenAI, OpenAIChat]) -> Callable[[Any], Any]:
|
|
|
|
def _create_retry_decorator(
|
|
|
|
|
|
|
|
llm: Union[BaseOpenAI, OpenAIChat],
|
|
|
|
|
|
|
|
run_manager: Optional[
|
|
|
|
|
|
|
|
Union[AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun]
|
|
|
|
|
|
|
|
] = None,
|
|
|
|
|
|
|
|
) -> Callable[[Any], Any]:
|
|
|
|
import openai
|
|
|
|
import openai
|
|
|
|
|
|
|
|
|
|
|
|
errors = [
|
|
|
|
errors = [
|
|
|
@ -90,12 +95,18 @@ def _create_retry_decorator(llm: Union[BaseOpenAI, OpenAIChat]) -> Callable[[Any
|
|
|
|
openai.error.RateLimitError,
|
|
|
|
openai.error.RateLimitError,
|
|
|
|
openai.error.ServiceUnavailableError,
|
|
|
|
openai.error.ServiceUnavailableError,
|
|
|
|
]
|
|
|
|
]
|
|
|
|
return create_base_retry_decorator(error_types=errors, max_retries=llm.max_retries)
|
|
|
|
return create_base_retry_decorator(
|
|
|
|
|
|
|
|
error_types=errors, max_retries=llm.max_retries, run_manager=run_manager
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def completion_with_retry(llm: Union[BaseOpenAI, OpenAIChat], **kwargs: Any) -> Any:
|
|
|
|
def completion_with_retry(
|
|
|
|
|
|
|
|
llm: Union[BaseOpenAI, OpenAIChat],
|
|
|
|
|
|
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
|
|
|
) -> Any:
|
|
|
|
"""Use tenacity to retry the completion call."""
|
|
|
|
"""Use tenacity to retry the completion call."""
|
|
|
|
retry_decorator = _create_retry_decorator(llm)
|
|
|
|
retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
|
|
|
|
|
|
|
|
|
|
|
|
@retry_decorator
|
|
|
|
@retry_decorator
|
|
|
|
def _completion_with_retry(**kwargs: Any) -> Any:
|
|
|
|
def _completion_with_retry(**kwargs: Any) -> Any:
|
|
|
@ -105,10 +116,12 @@ def completion_with_retry(llm: Union[BaseOpenAI, OpenAIChat], **kwargs: Any) ->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def acompletion_with_retry(
|
|
|
|
async def acompletion_with_retry(
|
|
|
|
llm: Union[BaseOpenAI, OpenAIChat], **kwargs: Any
|
|
|
|
llm: Union[BaseOpenAI, OpenAIChat],
|
|
|
|
|
|
|
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
|
|
|
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> Any:
|
|
|
|
) -> Any:
|
|
|
|
"""Use tenacity to retry the async completion call."""
|
|
|
|
"""Use tenacity to retry the async completion call."""
|
|
|
|
retry_decorator = _create_retry_decorator(llm)
|
|
|
|
retry_decorator = _create_retry_decorator(llm, run_manager=run_manager)
|
|
|
|
|
|
|
|
|
|
|
|
@retry_decorator
|
|
|
|
@retry_decorator
|
|
|
|
async def _completion_with_retry(**kwargs: Any) -> Any:
|
|
|
|
async def _completion_with_retry(**kwargs: Any) -> Any:
|
|
|
@ -291,8 +304,10 @@ class BaseOpenAI(BaseLLM):
|
|
|
|
**kwargs: Any,
|
|
|
|
**kwargs: Any,
|
|
|
|
) -> Iterator[GenerationChunk]:
|
|
|
|
) -> Iterator[GenerationChunk]:
|
|
|
|
params = {**self._invocation_params, **kwargs, "stream": True}
|
|
|
|
params = {**self._invocation_params, **kwargs, "stream": True}
|
|
|
|
self.get_sub_prompts(params, [prompt], stop) # this mutate params
|
|
|
|
self.get_sub_prompts(params, [prompt], stop) # this mutates params
|
|
|
|
for stream_resp in completion_with_retry(self, prompt=prompt, **params):
|
|
|
|
for stream_resp in completion_with_retry(
|
|
|
|
|
|
|
|
self, prompt=prompt, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
):
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
yield chunk
|
|
|
|
yield chunk
|
|
|
|
if run_manager:
|
|
|
|
if run_manager:
|
|
|
@ -314,7 +329,7 @@ class BaseOpenAI(BaseLLM):
|
|
|
|
params = {**self._invocation_params, **kwargs, "stream": True}
|
|
|
|
params = {**self._invocation_params, **kwargs, "stream": True}
|
|
|
|
self.get_sub_prompts(params, [prompt], stop) # this mutate params
|
|
|
|
self.get_sub_prompts(params, [prompt], stop) # this mutate params
|
|
|
|
async for stream_resp in await acompletion_with_retry(
|
|
|
|
async for stream_resp in await acompletion_with_retry(
|
|
|
|
self, prompt=prompt, **params
|
|
|
|
self, prompt=prompt, run_manager=run_manager, **params
|
|
|
|
):
|
|
|
|
):
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
chunk = _stream_response_to_generation_chunk(stream_resp)
|
|
|
|
yield chunk
|
|
|
|
yield chunk
|
|
|
@ -381,7 +396,9 @@ class BaseOpenAI(BaseLLM):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
response = completion_with_retry(self, prompt=_prompts, **params)
|
|
|
|
response = completion_with_retry(
|
|
|
|
|
|
|
|
self, prompt=_prompts, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
)
|
|
|
|
choices.extend(response["choices"])
|
|
|
|
choices.extend(response["choices"])
|
|
|
|
update_token_usage(_keys, response, token_usage)
|
|
|
|
update_token_usage(_keys, response, token_usage)
|
|
|
|
return self.create_llm_result(choices, prompts, token_usage)
|
|
|
|
return self.create_llm_result(choices, prompts, token_usage)
|
|
|
@ -428,7 +445,9 @@ class BaseOpenAI(BaseLLM):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
response = await acompletion_with_retry(self, prompt=_prompts, **params)
|
|
|
|
response = await acompletion_with_retry(
|
|
|
|
|
|
|
|
self, prompt=_prompts, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
)
|
|
|
|
choices.extend(response["choices"])
|
|
|
|
choices.extend(response["choices"])
|
|
|
|
update_token_usage(_keys, response, token_usage)
|
|
|
|
update_token_usage(_keys, response, token_usage)
|
|
|
|
return self.create_llm_result(choices, prompts, token_usage)
|
|
|
|
return self.create_llm_result(choices, prompts, token_usage)
|
|
|
@ -818,7 +837,9 @@ class OpenAIChat(BaseLLM):
|
|
|
|
) -> Iterator[GenerationChunk]:
|
|
|
|
) -> Iterator[GenerationChunk]:
|
|
|
|
messages, params = self._get_chat_params([prompt], stop)
|
|
|
|
messages, params = self._get_chat_params([prompt], stop)
|
|
|
|
params = {**params, **kwargs, "stream": True}
|
|
|
|
params = {**params, **kwargs, "stream": True}
|
|
|
|
for stream_resp in completion_with_retry(self, messages=messages, **params):
|
|
|
|
for stream_resp in completion_with_retry(
|
|
|
|
|
|
|
|
self, messages=messages, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
):
|
|
|
|
token = stream_resp["choices"][0]["delta"].get("content", "")
|
|
|
|
token = stream_resp["choices"][0]["delta"].get("content", "")
|
|
|
|
yield GenerationChunk(text=token)
|
|
|
|
yield GenerationChunk(text=token)
|
|
|
|
if run_manager:
|
|
|
|
if run_manager:
|
|
|
@ -834,7 +855,7 @@ class OpenAIChat(BaseLLM):
|
|
|
|
messages, params = self._get_chat_params([prompt], stop)
|
|
|
|
messages, params = self._get_chat_params([prompt], stop)
|
|
|
|
params = {**params, **kwargs, "stream": True}
|
|
|
|
params = {**params, **kwargs, "stream": True}
|
|
|
|
async for stream_resp in await acompletion_with_retry(
|
|
|
|
async for stream_resp in await acompletion_with_retry(
|
|
|
|
self, messages=messages, **params
|
|
|
|
self, messages=messages, run_manager=run_manager, **params
|
|
|
|
):
|
|
|
|
):
|
|
|
|
token = stream_resp["choices"][0]["delta"].get("content", "")
|
|
|
|
token = stream_resp["choices"][0]["delta"].get("content", "")
|
|
|
|
yield GenerationChunk(text=token)
|
|
|
|
yield GenerationChunk(text=token)
|
|
|
@ -860,7 +881,9 @@ class OpenAIChat(BaseLLM):
|
|
|
|
|
|
|
|
|
|
|
|
messages, params = self._get_chat_params(prompts, stop)
|
|
|
|
messages, params = self._get_chat_params(prompts, stop)
|
|
|
|
params = {**params, **kwargs}
|
|
|
|
params = {**params, **kwargs}
|
|
|
|
full_response = completion_with_retry(self, messages=messages, **params)
|
|
|
|
full_response = completion_with_retry(
|
|
|
|
|
|
|
|
self, messages=messages, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
)
|
|
|
|
llm_output = {
|
|
|
|
llm_output = {
|
|
|
|
"token_usage": full_response["usage"],
|
|
|
|
"token_usage": full_response["usage"],
|
|
|
|
"model_name": self.model_name,
|
|
|
|
"model_name": self.model_name,
|
|
|
@ -891,7 +914,9 @@ class OpenAIChat(BaseLLM):
|
|
|
|
|
|
|
|
|
|
|
|
messages, params = self._get_chat_params(prompts, stop)
|
|
|
|
messages, params = self._get_chat_params(prompts, stop)
|
|
|
|
params = {**params, **kwargs}
|
|
|
|
params = {**params, **kwargs}
|
|
|
|
full_response = await acompletion_with_retry(self, messages=messages, **params)
|
|
|
|
full_response = await acompletion_with_retry(
|
|
|
|
|
|
|
|
self, messages=messages, run_manager=run_manager, **params
|
|
|
|
|
|
|
|
)
|
|
|
|
llm_output = {
|
|
|
|
llm_output = {
|
|
|
|
"token_usage": full_response["usage"],
|
|
|
|
"token_usage": full_response["usage"],
|
|
|
|
"model_name": self.model_name,
|
|
|
|
"model_name": self.model_name,
|
|
|
|