community[minor]: Update OctoAI LLM, Embedding and documentation (#16710)

This PR includes updates for OctoAI integrations: - The LLM class was updated to fix a bug that occurs with multiple sequential calls - The Embedding class was updated to support the new GTE-Large endpoint released on OctoAI lately - The documentation jupyter notebook was updated to reflect using the new LLM sdk Thank you!
2024-11-16 06:13:16 +00:00 · 2024-01-29 13:57:17 -08:00 · 2024-01-29 13:57:17 -08:00 · 85e93e05ed
commit 85e93e05ed
parent 6d6226d96d
3 changed files with 75 additions and 46 deletions
--- a/docs/docs/integrations/llms/octoai.ipynb
+++ b/docs/docs/integrations/llms/octoai.ipynb
@ -26,19 +26,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OCTOAI_API_TOKEN\"] = \"OCTOAI_API_TOKEN\"\n",
-    "os.environ[\"ENDPOINT_URL\"] = \"https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate\""
+    "os.environ[\"ENDPOINT_URL\"] = \"https://text.octoai.run/v1/chat/completions\""
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -56,7 +56,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -66,36 +66,40 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OctoAIEndpoint(\n",
    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 200,\n",
+    "        \"model\": \"llama-2-13b-chat-fp16\",\n",
-    "        \"temperature\": 0.75,\n",
+    "        \"max_tokens\": 128,\n",
-    "        \"top_p\": 0.95,\n",
+    "        \"presence_penalty\": 0,\n",
-    "        \"repetition_penalty\": 1,\n",
+    "        \"temperature\": 0.1,\n",
-    "        \"seed\": None,\n",
+    "        \"top_p\": 0.9,\n",
-    "        \"stop\": [],\n",
+    "        \"messages\": [\n",
    "            {\n",
    "                \"role\": \"system\",\n",
    "                \"content\": \"You are a helpful assistant. Keep your responses limited to one short paragraph if possible.\",\n",
    "            },\n",
    "        ],\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
-     "data": {
+     "name": "stdout",
-      "text/plain": [
+     "output_type": "stream",
-       "'\\nLeonardo da Vinci was an Italian polymath and painter regarded by many as one of the greatest painters of all time. He is best known for his masterpieces including Mona Lisa, The Last Supper, and The Virgin of the Rocks. He was a draftsman, sculptor, architect, and one of the most important figures in the history of science. Da Vinci flew gliders, experimented with water turbines and windmills, and invented the catapult and a joystick-type human-powered aircraft control. He may have pioneered helicopters. As a scholar, he was interested in anatomy, geology, botany, engineering, mathematics, and astronomy.\\nOther painters and patrons claimed to be more talented, but Leonardo da Vinci was an incredibly productive artist, sculptor, engineer, anatomist, and scientist.'"
+     "text": [
-      ]
+      "  Sure thing! Here's my response:\n",
-     },
+      "\n",
-     "execution_count": 31,
+      "Leonardo da Vinci was a true Renaissance man - an Italian polymath who excelled in various fields, including painting, sculpture, engineering, mathematics, anatomy, and geology. He is widely considered one of the greatest painters of all time, and his inventive and innovative works continue to inspire and influence artists and thinkers to this day. Some of his most famous works include the Mona Lisa, The Last Supper, and Vitruvian Man. \n"
-     "metadata": {},
+     ]
     "output_type": "execute_result"
    }
   ],
   "source": [
@ -103,7 +107,7 @@
    "\n",
    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
    "\n",
-    "llm_chain.run(question)"
+    "print(llm_chain.run(question))"
   ]
  }
 ],
@ -123,7 +127,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.7"
  },
  "vscode": {
   "interpreter": {
--- a/libs/community/langchain_community/embeddings/octoai_embeddings.py
+++ b/libs/community/langchain_community/embeddings/octoai_embeddings.py
@ -41,7 +41,7 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
            values, "octoai_api_token", "OCTOAI_API_TOKEN"
        )
        values["endpoint_url"] = get_from_dict_or_env(
-            values, "endpoint_url", "ENDPOINT_URL"
+            values, "endpoint_url", "https://text.octoai.run/v1/embeddings"
        )
        return values
@ -59,19 +59,29 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
        """Compute embeddings using an OctoAI instruct model."""
        from octoai import client
        embedding = []
        embeddings = []
        octoai_client = client.Client(token=self.octoai_api_token)
        for text in texts:
            parameter_payload = {
-                "sentence": str([text]),  # for item in text]),
+                "sentence": str([text]),
-                "instruction": str([instruction]),  # for item in text]),
+                "input": str([text]),
                "instruction": str([instruction]),
                "model": "thenlper/gte-large",
                "parameters": self.model_kwargs or {},
            }
            try:
                resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)
-                embedding = resp_json["embeddings"]
+                if "embeddings" in resp_json:
                    embedding = resp_json["embeddings"]
                elif "data" in resp_json:
                    json_data = resp_json["data"]
                    for item in json_data:
                        if "embedding" in item:
                            embedding.append(item["embedding"])
            except Exception as e:
                raise ValueError(f"Error raised by the inference endpoint: {e}") from e
--- a/libs/community/langchain_community/llms/octoai_endpoint.py
+++ b/libs/community/langchain_community/llms/octoai_endpoint.py
@ -24,23 +24,9 @@ class OctoAIEndpoint(LLM):
            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
            OctoAIEndpoint(
                octoai_api_token="octoai-api-key",
-                endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",
+                endpoint_url="https://text.octoai.run/v1/chat/completions",
                model_kwargs={
-                    "max_new_tokens": 200,
+                    "model": "llama-2-13b-chat-fp16",
                    "temperature": 0.75,
                    "top_p": 0.95,
                    "repetition_penalty": 1,
                    "seed": None,
                    "stop": [],
                },
            )
            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
            OctoAIEndpoint(
                octoai_api_token="octoai-api-key",
                endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",
                model_kwargs={
                    "model": "llama-2-7b-chat",
                    "messages": [
                        {
                            "role": "system",
@ -49,7 +35,10 @@ class OctoAIEndpoint(LLM):
                        }
                    ],
                    "stream": False,
-                    "max_tokens": 256
+                    "max_tokens": 256,
                    "presence_penalty": 0,
                    "temperature": 0.1,
                    "top_p": 0.9
                }
            )
@ -119,19 +108,45 @@ class OctoAIEndpoint(LLM):
        _model_kwargs = self.model_kwargs or {}
        try:
            # Initialize the OctoAI client
            from octoai import client
            # Initialize the OctoAI client
            octoai_client = client.Client(token=self.octoai_api_token)
            if "model" in _model_kwargs:
                parameter_payload = _model_kwargs
                sys_msg = None
                if "messages" in parameter_payload:
                    msgs = parameter_payload.get("messages", [])
                    for msg in msgs:
                        if msg.get("role") == "system":
                            sys_msg = msg.get("content")
                # Reset messages list
                parameter_payload["messages"] = []
                # Append system message if exists
                if sys_msg:
                    parameter_payload["messages"].append(
                        {"role": "system", "content": sys_msg}
                    )
                # Append user message
                parameter_payload["messages"].append(
                    {"role": "user", "content": prompt}
                )
                # Send the request using the OctoAI client
-                output = octoai_client.infer(self.endpoint_url, parameter_payload)
+                try:
-                text = output.get("choices")[0].get("message").get("content")
+                    output = octoai_client.infer(self.endpoint_url, parameter_payload)
                    if output and "choices" in output and len(output["choices"]) > 0:
                        text = output["choices"][0].get("message", {}).get("content")
                    else:
                        text = "Error: Invalid response format or empty choices."
                except Exception as e:
                    text = f"Error during API call: {str(e)}"
            else:
                # Prepare the payload JSON
                parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}