mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
403 lines
14 KiB
Python
403 lines
14 KiB
Python
|
import asyncio
|
||
|
import logging
|
||
|
from concurrent.futures import ThreadPoolExecutor
|
||
|
from typing import Any, Dict, List, Mapping, Optional, Sequence, TypedDict
|
||
|
|
||
|
import aiohttp
|
||
|
import requests
|
||
|
from langchain_core.callbacks import (
|
||
|
AsyncCallbackManagerForLLMRun,
|
||
|
CallbackManagerForLLMRun,
|
||
|
)
|
||
|
from langchain_core.language_models.llms import BaseLLM
|
||
|
from langchain_core.outputs import Generation, LLMResult
|
||
|
from langchain_core.pydantic_v1 import Extra, Field, root_validator
|
||
|
from langchain_core.utils import get_from_dict_or_env
|
||
|
|
||
|
from langchain_community.llms.utils import enforce_stop_tokens
|
||
|
|
||
|
|
||
|
class TrainResult(TypedDict):
|
||
|
"""Train result."""
|
||
|
|
||
|
loss: float
|
||
|
|
||
|
|
||
|
class GradientLLM(BaseLLM):
|
||
|
"""Gradient.ai LLM Endpoints.
|
||
|
|
||
|
GradientLLM is a class to interact with LLMs on gradient.ai
|
||
|
|
||
|
To use, set the environment variable ``GRADIENT_ACCESS_TOKEN`` with your
|
||
|
API token and ``GRADIENT_WORKSPACE_ID`` for your gradient workspace,
|
||
|
or alternatively provide them as keywords to the constructor of this class.
|
||
|
|
||
|
Example:
|
||
|
.. code-block:: python
|
||
|
|
||
|
from langchain_community.llms import GradientLLM
|
||
|
GradientLLM(
|
||
|
model="99148c6d-c2a0-4fbe-a4a7-e7c05bdb8a09_base_ml_model",
|
||
|
model_kwargs={
|
||
|
"max_generated_token_count": 128,
|
||
|
"temperature": 0.75,
|
||
|
"top_p": 0.95,
|
||
|
"top_k": 20,
|
||
|
"stop": [],
|
||
|
},
|
||
|
gradient_workspace_id="12345614fc0_workspace",
|
||
|
gradient_access_token="gradientai-access_token",
|
||
|
)
|
||
|
|
||
|
"""
|
||
|
|
||
|
model_id: str = Field(alias="model", min_length=2)
|
||
|
"Underlying gradient.ai model id (base or fine-tuned)."
|
||
|
|
||
|
gradient_workspace_id: Optional[str] = None
|
||
|
"Underlying gradient.ai workspace_id."
|
||
|
|
||
|
gradient_access_token: Optional[str] = None
|
||
|
"""gradient.ai API Token, which can be generated by going to
|
||
|
https://auth.gradient.ai/select-workspace
|
||
|
and selecting "Access tokens" under the profile drop-down.
|
||
|
"""
|
||
|
|
||
|
model_kwargs: Optional[dict] = None
|
||
|
"""Keyword arguments to pass to the model."""
|
||
|
|
||
|
gradient_api_url: str = "https://api.gradient.ai/api"
|
||
|
"""Endpoint URL to use."""
|
||
|
|
||
|
aiosession: Optional[aiohttp.ClientSession] = None #: :meta private:
|
||
|
"""ClientSession, private, subject to change in upcoming releases."""
|
||
|
|
||
|
# LLM call kwargs
|
||
|
class Config:
|
||
|
"""Configuration for this pydantic object."""
|
||
|
|
||
|
allow_population_by_field_name = True
|
||
|
extra = Extra.forbid
|
||
|
|
||
|
@root_validator(allow_reuse=True)
|
||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||
|
"""Validate that api key and python package exists in environment."""
|
||
|
|
||
|
values["gradient_access_token"] = get_from_dict_or_env(
|
||
|
values, "gradient_access_token", "GRADIENT_ACCESS_TOKEN"
|
||
|
)
|
||
|
values["gradient_workspace_id"] = get_from_dict_or_env(
|
||
|
values, "gradient_workspace_id", "GRADIENT_WORKSPACE_ID"
|
||
|
)
|
||
|
|
||
|
if (
|
||
|
values["gradient_access_token"] is None
|
||
|
or len(values["gradient_access_token"]) < 10
|
||
|
):
|
||
|
raise ValueError("env variable `GRADIENT_ACCESS_TOKEN` must be set")
|
||
|
|
||
|
if (
|
||
|
values["gradient_workspace_id"] is None
|
||
|
or len(values["gradient_access_token"]) < 3
|
||
|
):
|
||
|
raise ValueError("env variable `GRADIENT_WORKSPACE_ID` must be set")
|
||
|
|
||
|
if values["model_kwargs"]:
|
||
|
kw = values["model_kwargs"]
|
||
|
if not 0 <= kw.get("temperature", 0.5) <= 1:
|
||
|
raise ValueError("`temperature` must be in the range [0.0, 1.0]")
|
||
|
|
||
|
if not 0 <= kw.get("top_p", 0.5) <= 1:
|
||
|
raise ValueError("`top_p` must be in the range [0.0, 1.0]")
|
||
|
|
||
|
if 0 >= kw.get("top_k", 0.5):
|
||
|
raise ValueError("`top_k` must be positive")
|
||
|
|
||
|
if 0 >= kw.get("max_generated_token_count", 1):
|
||
|
raise ValueError("`max_generated_token_count` must be positive")
|
||
|
|
||
|
values["gradient_api_url"] = get_from_dict_or_env(
|
||
|
values, "gradient_api_url", "GRADIENT_API_URL"
|
||
|
)
|
||
|
|
||
|
try:
|
||
|
import gradientai # noqa
|
||
|
except ImportError:
|
||
|
logging.warning(
|
||
|
"DeprecationWarning: `GradientLLM` will use "
|
||
|
"`pip install gradientai` in future releases of langchain."
|
||
|
)
|
||
|
except Exception:
|
||
|
pass
|
||
|
|
||
|
return values
|
||
|
|
||
|
@property
|
||
|
def _identifying_params(self) -> Mapping[str, Any]:
|
||
|
"""Get the identifying parameters."""
|
||
|
_model_kwargs = self.model_kwargs or {}
|
||
|
return {
|
||
|
**{"gradient_api_url": self.gradient_api_url},
|
||
|
**{"model_kwargs": _model_kwargs},
|
||
|
}
|
||
|
|
||
|
@property
|
||
|
def _llm_type(self) -> str:
|
||
|
"""Return type of llm."""
|
||
|
return "gradient"
|
||
|
|
||
|
def _kwargs_post_fine_tune_request(
|
||
|
self, inputs: Sequence[str], kwargs: Mapping[str, Any]
|
||
|
) -> Mapping[str, Any]:
|
||
|
"""Build the kwargs for the Post request, used by sync
|
||
|
|
||
|
Args:
|
||
|
prompt (str): prompt used in query
|
||
|
kwargs (dict): model kwargs in payload
|
||
|
|
||
|
Returns:
|
||
|
Dict[str, Union[str,dict]]: _description_
|
||
|
"""
|
||
|
_model_kwargs = self.model_kwargs or {}
|
||
|
_params = {**_model_kwargs, **kwargs}
|
||
|
|
||
|
multipliers = _params.get("multipliers", None)
|
||
|
|
||
|
return dict(
|
||
|
url=f"{self.gradient_api_url}/models/{self.model_id}/fine-tune",
|
||
|
headers={
|
||
|
"authorization": f"Bearer {self.gradient_access_token}",
|
||
|
"x-gradient-workspace-id": f"{self.gradient_workspace_id}",
|
||
|
"accept": "application/json",
|
||
|
"content-type": "application/json",
|
||
|
},
|
||
|
json=dict(
|
||
|
samples=tuple(
|
||
|
{
|
||
|
"inputs": input,
|
||
|
}
|
||
|
for input in inputs
|
||
|
)
|
||
|
if multipliers is None
|
||
|
else tuple(
|
||
|
{
|
||
|
"inputs": input,
|
||
|
"fineTuningParameters": {
|
||
|
"multiplier": multiplier,
|
||
|
},
|
||
|
}
|
||
|
for input, multiplier in zip(inputs, multipliers)
|
||
|
),
|
||
|
),
|
||
|
)
|
||
|
|
||
|
def _kwargs_post_request(
|
||
|
self, prompt: str, kwargs: Mapping[str, Any]
|
||
|
) -> Mapping[str, Any]:
|
||
|
"""Build the kwargs for the Post request, used by sync
|
||
|
|
||
|
Args:
|
||
|
prompt (str): prompt used in query
|
||
|
kwargs (dict): model kwargs in payload
|
||
|
|
||
|
Returns:
|
||
|
Dict[str, Union[str,dict]]: _description_
|
||
|
"""
|
||
|
_model_kwargs = self.model_kwargs or {}
|
||
|
_params = {**_model_kwargs, **kwargs}
|
||
|
|
||
|
return dict(
|
||
|
url=f"{self.gradient_api_url}/models/{self.model_id}/complete",
|
||
|
headers={
|
||
|
"authorization": f"Bearer {self.gradient_access_token}",
|
||
|
"x-gradient-workspace-id": f"{self.gradient_workspace_id}",
|
||
|
"accept": "application/json",
|
||
|
"content-type": "application/json",
|
||
|
},
|
||
|
json=dict(
|
||
|
query=prompt,
|
||
|
maxGeneratedTokenCount=_params.get("max_generated_token_count", None),
|
||
|
temperature=_params.get("temperature", None),
|
||
|
topK=_params.get("top_k", None),
|
||
|
topP=_params.get("top_p", None),
|
||
|
),
|
||
|
)
|
||
|
|
||
|
def _call(
|
||
|
self,
|
||
|
prompt: str,
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> str:
|
||
|
"""Call to Gradients API `model/{id}/complete`.
|
||
|
|
||
|
Args:
|
||
|
prompt: The prompt to pass into the model.
|
||
|
stop: Optional list of stop words to use when generating.
|
||
|
|
||
|
Returns:
|
||
|
The string generated by the model.
|
||
|
"""
|
||
|
try:
|
||
|
response = requests.post(**self._kwargs_post_request(prompt, kwargs))
|
||
|
if response.status_code != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status_code}: {response.text}"
|
||
|
)
|
||
|
except requests.exceptions.RequestException as e:
|
||
|
raise Exception(f"RequestException while calling Gradient Endpoint: {e}")
|
||
|
|
||
|
text = response.json()["generatedOutput"]
|
||
|
|
||
|
if stop is not None:
|
||
|
# Apply stop tokens when making calls to Gradient
|
||
|
text = enforce_stop_tokens(text, stop)
|
||
|
|
||
|
return text
|
||
|
|
||
|
async def _acall(
|
||
|
self,
|
||
|
prompt: str,
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> str:
|
||
|
"""Async Call to Gradients API `model/{id}/complete`.
|
||
|
|
||
|
Args:
|
||
|
prompt: The prompt to pass into the model.
|
||
|
stop: Optional list of stop words to use when generating.
|
||
|
|
||
|
Returns:
|
||
|
The string generated by the model.
|
||
|
"""
|
||
|
if not self.aiosession:
|
||
|
async with aiohttp.ClientSession() as session:
|
||
|
async with session.post(
|
||
|
**self._kwargs_post_request(prompt=prompt, kwargs=kwargs)
|
||
|
) as response:
|
||
|
if response.status != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status}: {response.text}"
|
||
|
)
|
||
|
text = (await response.json())["generatedOutput"]
|
||
|
else:
|
||
|
async with self.aiosession.post(
|
||
|
**self._kwargs_post_request(prompt=prompt, kwargs=kwargs)
|
||
|
) as response:
|
||
|
if response.status != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status}: {response.text}"
|
||
|
)
|
||
|
text = (await response.json())["generatedOutput"]
|
||
|
|
||
|
if stop is not None:
|
||
|
# Apply stop tokens when making calls to Gradient
|
||
|
text = enforce_stop_tokens(text, stop)
|
||
|
|
||
|
return text
|
||
|
|
||
|
def _generate(
|
||
|
self,
|
||
|
prompts: List[str],
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> LLMResult:
|
||
|
"""Run the LLM on the given prompt and input."""
|
||
|
|
||
|
# same thing with threading
|
||
|
def _inner_generate(prompt: str) -> List[Generation]:
|
||
|
return [
|
||
|
Generation(
|
||
|
text=self._call(
|
||
|
prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
|
||
|
)
|
||
|
)
|
||
|
]
|
||
|
|
||
|
if len(prompts) <= 1:
|
||
|
generations = list(map(_inner_generate, prompts))
|
||
|
else:
|
||
|
with ThreadPoolExecutor(min(8, len(prompts))) as p:
|
||
|
generations = list(p.map(_inner_generate, prompts))
|
||
|
|
||
|
return LLMResult(generations=generations)
|
||
|
|
||
|
async def _agenerate(
|
||
|
self,
|
||
|
prompts: List[str],
|
||
|
stop: Optional[List[str]] = None,
|
||
|
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
|
||
|
**kwargs: Any,
|
||
|
) -> LLMResult:
|
||
|
"""Run the LLM on the given prompt and input."""
|
||
|
generations = []
|
||
|
for generation in asyncio.gather(
|
||
|
[self._acall(prompt, stop=stop, run_manager=run_manager, **kwargs)]
|
||
|
for prompt in prompts
|
||
|
):
|
||
|
generations.append([Generation(text=generation)])
|
||
|
return LLMResult(generations=generations)
|
||
|
|
||
|
def train_unsupervised(
|
||
|
self,
|
||
|
inputs: Sequence[str],
|
||
|
**kwargs: Any,
|
||
|
) -> TrainResult:
|
||
|
try:
|
||
|
response = requests.post(
|
||
|
**self._kwargs_post_fine_tune_request(inputs, kwargs)
|
||
|
)
|
||
|
if response.status_code != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status_code}: {response.text}"
|
||
|
)
|
||
|
except requests.exceptions.RequestException as e:
|
||
|
raise Exception(f"RequestException while calling Gradient Endpoint: {e}")
|
||
|
|
||
|
response_json = response.json()
|
||
|
loss = response_json["sumLoss"] / response_json["numberOfTrainableTokens"]
|
||
|
return TrainResult(loss=loss)
|
||
|
|
||
|
async def atrain_unsupervised(
|
||
|
self,
|
||
|
inputs: Sequence[str],
|
||
|
**kwargs: Any,
|
||
|
) -> TrainResult:
|
||
|
if not self.aiosession:
|
||
|
async with aiohttp.ClientSession() as session:
|
||
|
async with session.post(
|
||
|
**self._kwargs_post_fine_tune_request(inputs, kwargs)
|
||
|
) as response:
|
||
|
if response.status != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status}: {response.text}"
|
||
|
)
|
||
|
response_json = await response.json()
|
||
|
loss = (
|
||
|
response_json["sumLoss"]
|
||
|
/ response_json["numberOfTrainableTokens"]
|
||
|
)
|
||
|
else:
|
||
|
async with self.aiosession.post(
|
||
|
**self._kwargs_post_fine_tune_request(inputs, kwargs)
|
||
|
) as response:
|
||
|
if response.status != 200:
|
||
|
raise Exception(
|
||
|
f"Gradient returned an unexpected response with status "
|
||
|
f"{response.status}: {response.text}"
|
||
|
)
|
||
|
response_json = await response.json()
|
||
|
loss = (
|
||
|
response_json["sumLoss"] / response_json["numberOfTrainableTokens"]
|
||
|
)
|
||
|
|
||
|
return TrainResult(loss=loss)
|