langchain/libs/community/langchain_community/llms/octoai_endpoint.py

from typing import Any, Dict, List, Mapping, Optional

from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.pydantic_v1 import Extra, root_validator
from langchain_core.utils import get_from_dict_or_env

from langchain_community.llms.utils import enforce_stop_tokens


class OctoAIEndpoint(LLM):
    """OctoAI LLM Endpoints.

    OctoAIEndpoint is a class to interact with OctoAI
     Compute Service large language model endpoints.

    To use, you should have the ``octoai`` python package installed, and the
    environment variable ``OCTOAI_API_TOKEN`` set with your API token, or pass
    it as a named parameter to the constructor.

    Example:
        .. code-block:: python

            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
            OctoAIEndpoint(
                octoai_api_token="octoai-api-key",
                endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",
                model_kwargs={
                    "max_new_tokens": 200,
                    "temperature": 0.75,
                    "top_p": 0.95,
                    "repetition_penalty": 1,
                    "seed": None,
                    "stop": [],
                },
            )

            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
            OctoAIEndpoint(
                octoai_api_token="octoai-api-key",
                endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",
                model_kwargs={
                    "model": "llama-2-7b-chat",
                    "messages": [
                        {
                            "role": "system",
                            "content": "Below is an instruction that describes a task.
                                Write a response that completes the request."
                        }
                    ],
                    "stream": False,
                    "max_tokens": 256
                }
            )

    """

    endpoint_url: Optional[str] = None
    """Endpoint URL to use."""

    model_kwargs: Optional[dict] = None
    """Keyword arguments to pass to the model."""

    octoai_api_token: Optional[str] = None
    """OCTOAI API Token"""

    streaming: bool = False
    """Whether to generate a stream of tokens asynchronously"""

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid

    @root_validator(allow_reuse=True)
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that api key and python package exists in environment."""
        octoai_api_token = get_from_dict_or_env(
            values, "octoai_api_token", "OCTOAI_API_TOKEN"
        )
        values["endpoint_url"] = get_from_dict_or_env(
            values, "endpoint_url", "ENDPOINT_URL"
        )

        values["octoai_api_token"] = octoai_api_token
        return values

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        _model_kwargs = self.model_kwargs or {}
        return {
            **{"endpoint_url": self.endpoint_url},
            **{"model_kwargs": _model_kwargs},
        }

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "octoai_endpoint"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call out to OctoAI's inference endpoint.

        Args:
            prompt: The prompt to pass into the model.
            stop: Optional list of stop words to use when generating.

        Returns:
            The string generated by the model.

        """
        _model_kwargs = self.model_kwargs or {}

        try:
            # Initialize the OctoAI client
            from octoai import client

            octoai_client = client.Client(token=self.octoai_api_token)

            if "model" in _model_kwargs:
                parameter_payload = _model_kwargs
                parameter_payload["messages"].append(
                    {"role": "user", "content": prompt}
                )
                # Send the request using the OctoAI client
                output = octoai_client.infer(self.endpoint_url, parameter_payload)
                text = output.get("choices")[0].get("message").get("content")
            else:
                # Prepare the payload JSON
                parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}

                # Send the request using the OctoAI client
                resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)
                text = resp_json["generated_text"]

        except Exception as e:
            # Handle any errors raised by the inference endpoint
            raise ValueError(f"Error raised by the inference endpoint: {e}") from e

        if stop is not None:
            # Apply stop tokens when making calls to OctoAI
            text = enforce_stop_tokens(text, stop)

        return text
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`from typing import Any, Dict, List, Mapping, Optional`

			`from langchain_core.callbacks import CallbackManagerForLLMRun`
			`from langchain_core.language_models.llms import LLM`
			`from langchain_core.pydantic_v1 import Extra, root_validator`
			`from langchain_core.utils import get_from_dict_or_env`

			`from langchain_community.llms.utils import enforce_stop_tokens`


			`class OctoAIEndpoint(LLM):`
			`"""OctoAI LLM Endpoints.`

			`OctoAIEndpoint is a class to interact with OctoAI`
			`Compute Service large language model endpoints.`

			To use, you should have the ``octoai`` python package installed, and the
			environment variable ``OCTOAI_API_TOKEN`` set with your API token, or pass
			`it as a named parameter to the constructor.`

			`Example:`
			`.. code-block:: python`

			`from langchain_community.llms.octoai_endpoint import OctoAIEndpoint`
			`OctoAIEndpoint(`
			`octoai_api_token="octoai-api-key",`
			`endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",`
			`model_kwargs={`
			`"max_new_tokens": 200,`
			`"temperature": 0.75,`
			`"top_p": 0.95,`
			`"repetition_penalty": 1,`
			`"seed": None,`
			`"stop": [],`
			`},`
			`)`

			`from langchain_community.llms.octoai_endpoint import OctoAIEndpoint`
			`OctoAIEndpoint(`
			`octoai_api_token="octoai-api-key",`
			`endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",`
			`model_kwargs={`
			`"model": "llama-2-7b-chat",`
			`"messages": [`
			`{`
			`"role": "system",`
			`"content": "Below is an instruction that describes a task.`
			`Write a response that completes the request."`
			`}`
			`],`
			`"stream": False,`
			`"max_tokens": 256`
			`}`
			`)`

			`"""`

			`endpoint_url: Optional[str] = None`
			`"""Endpoint URL to use."""`

			`model_kwargs: Optional[dict] = None`
			`"""Keyword arguments to pass to the model."""`

			`octoai_api_token: Optional[str] = None`
			`"""OCTOAI API Token"""`

			`streaming: bool = False`
			`"""Whether to generate a stream of tokens asynchronously"""`

			`class Config:`
			`"""Configuration for this pydantic object."""`

			`extra = Extra.forbid`

			`@root_validator(allow_reuse=True)`
			`def validate_environment(cls, values: Dict) -> Dict:`
			`"""Validate that api key and python package exists in environment."""`
			`octoai_api_token = get_from_dict_or_env(`
			`values, "octoai_api_token", "OCTOAI_API_TOKEN"`
			`)`
			`values["endpoint_url"] = get_from_dict_or_env(`
			`values, "endpoint_url", "ENDPOINT_URL"`
			`)`

			`values["octoai_api_token"] = octoai_api_token`
			`return values`

			`@property`
			`def _identifying_params(self) -> Mapping[str, Any]:`
			`"""Get the identifying parameters."""`
			`_model_kwargs = self.model_kwargs or {}`
			`return {`
			`**{"endpoint_url": self.endpoint_url},`
			`**{"model_kwargs": _model_kwargs},`
			`}`

			`@property`
			`def _llm_type(self) -> str:`
			`"""Return type of llm."""`
			`return "octoai_endpoint"`

			`def _call(`
			`self,`
			`prompt: str,`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[CallbackManagerForLLMRun] = None,`
			`**kwargs: Any,`
			`) -> str:`
			`"""Call out to OctoAI's inference endpoint.`

			`Args:`
			`prompt: The prompt to pass into the model.`
			`stop: Optional list of stop words to use when generating.`

			`Returns:`
			`The string generated by the model.`

			`"""`
			`_model_kwargs = self.model_kwargs or {}`

			`try:`
			`# Initialize the OctoAI client`
			`from octoai import client`

			`octoai_client = client.Client(token=self.octoai_api_token)`

			`if "model" in _model_kwargs:`
			`parameter_payload = _model_kwargs`
			`parameter_payload["messages"].append(`
			`{"role": "user", "content": prompt}`
			`)`
			`# Send the request using the OctoAI client`
			`output = octoai_client.infer(self.endpoint_url, parameter_payload)`
			`text = output.get("choices")[0].get("message").get("content")`
			`else:`
			`# Prepare the payload JSON`
			`parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}`

			`# Send the request using the OctoAI client`
			`resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)`
			`text = resp_json["generated_text"]`

			`except Exception as e:`
			`# Handle any errors raised by the inference endpoint`
			`raise ValueError(f"Error raised by the inference endpoint: {e}") from e`

			`if stop is not None:`
			`# Apply stop tokens when making calls to OctoAI`
			`text = enforce_stop_tokens(text, stop)`

			`return text`