community[minor]: Update OctoAI LLM, Embedding and documentation (#16710)

This PR includes updates for OctoAI integrations: - The LLM class was updated to fix a bug that occurs with multiple sequential calls - The Embedding class was updated to support the new GTE-Large endpoint released on OctoAI lately - The documentation jupyter notebook was updated to reflect using the new LLM sdk Thank you!
5 months ago · 85e93e05ed
parent 6d6226d96d
commit 85e93e05ed
3 changed files with 75 additions and 46 deletions
--- a/docs/docs/integrations/llms/octoai.ipynb
+++ b/docs/docs/integrations/llms/octoai.ipynb
@ -26,19 +26,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"OCTOAI_API_TOKEN\"] = \"OCTOAI_API_TOKEN\"\n",
-    "os.environ[\"ENDPOINT_URL\"] = \"https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate\""
+    "os.environ[\"ENDPOINT_URL\"] = \"https://text.octoai.run/v1/chat/completions\""
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -56,7 +56,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@ -66,36 +66,40 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OctoAIEndpoint(\n",
    "    model_kwargs={\n",
-    "        \"max_new_tokens\": 200,\n",
-    "        \"temperature\": 0.75,\n",
-    "        \"top_p\": 0.95,\n",
-    "        \"repetition_penalty\": 1,\n",
-    "        \"seed\": None,\n",
-    "        \"stop\": [],\n",
+    "        \"model\": \"llama-2-13b-chat-fp16\",\n",
+    "        \"max_tokens\": 128,\n",
+    "        \"presence_penalty\": 0,\n",
+    "        \"temperature\": 0.1,\n",
+    "        \"top_p\": 0.9,\n",
+    "        \"messages\": [\n",
+    "            {\n",
+    "                \"role\": \"system\",\n",
+    "                \"content\": \"You are a helpful assistant. Keep your responses limited to one short paragraph if possible.\",\n",
+    "            },\n",
+    "        ],\n",
    "    },\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "'\\nLeonardo da Vinci was an Italian polymath and painter regarded by many as one of the greatest painters of all time. He is best known for his masterpieces including Mona Lisa, The Last Supper, and The Virgin of the Rocks. He was a draftsman, sculptor, architect, and one of the most important figures in the history of science. Da Vinci flew gliders, experimented with water turbines and windmills, and invented the catapult and a joystick-type human-powered aircraft control. He may have pioneered helicopters. As a scholar, he was interested in anatomy, geology, botany, engineering, mathematics, and astronomy.\\nOther painters and patrons claimed to be more talented, but Leonardo da Vinci was an incredibly productive artist, sculptor, engineer, anatomist, and scientist.'"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Sure thing! Here's my response:\n",
+      "\n",
+      "Leonardo da Vinci was a true Renaissance man - an Italian polymath who excelled in various fields, including painting, sculpture, engineering, mathematics, anatomy, and geology. He is widely considered one of the greatest painters of all time, and his inventive and innovative works continue to inspire and influence artists and thinkers to this day. Some of his most famous works include the Mona Lisa, The Last Supper, and Vitruvian Man. \n"
+     ]
    }
   ],
   "source": [
@ -103,7 +107,7 @@
    "\n",
    "llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
    "\n",
-    "llm_chain.run(question)"
+    "print(llm_chain.run(question))"
   ]
  }
 ],
@ -123,7 +127,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.7"
  },
  "vscode": {
   "interpreter": {
--- a/libs/community/langchain_community/embeddings/octoai_embeddings.py
+++ b/libs/community/langchain_community/embeddings/octoai_embeddings.py
@ -41,7 +41,7 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
            values, "octoai_api_token", "OCTOAI_API_TOKEN"
        )
        values["endpoint_url"] = get_from_dict_or_env(
-            values, "endpoint_url", "ENDPOINT_URL"
+            values, "endpoint_url", "https://text.octoai.run/v1/embeddings"
        )
        return values

@ -59,19 +59,29 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
        """Compute embeddings using an OctoAI instruct model."""
        from octoai import client

+        embedding = []
        embeddings = []
        octoai_client = client.Client(token=self.octoai_api_token)

        for text in texts:
            parameter_payload = {
-                "sentence": str([text]),  # for item in text]),
-                "instruction": str([instruction]),  # for item in text]),
+                "sentence": str([text]),
+                "input": str([text]),
+                "instruction": str([instruction]),
+                "model": "thenlper/gte-large",
                "parameters": self.model_kwargs or {},
            }

            try:
                resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)
-                embedding = resp_json["embeddings"]
+                if "embeddings" in resp_json:
+                    embedding = resp_json["embeddings"]
+                elif "data" in resp_json:
+                    json_data = resp_json["data"]
+                    for item in json_data:
+                        if "embedding" in item:
+                            embedding.append(item["embedding"])
+
            except Exception as e:
                raise ValueError(f"Error raised by the inference endpoint: {e}") from e

--- a/libs/community/langchain_community/llms/octoai_endpoint.py
+++ b/libs/community/langchain_community/llms/octoai_endpoint.py
@ -24,23 +24,9 @@ class OctoAIEndpoint(LLM):
            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
            OctoAIEndpoint(
                octoai_api_token="octoai-api-key",
-                endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",
+                endpoint_url="https://text.octoai.run/v1/chat/completions",
                model_kwargs={
-                    "max_new_tokens": 200,
-                    "temperature": 0.75,
-                    "top_p": 0.95,
-                    "repetition_penalty": 1,
-                    "seed": None,
-                    "stop": [],
-                },
-            )
-
-            from langchain_community.llms.octoai_endpoint  import OctoAIEndpoint
-            OctoAIEndpoint(
-                octoai_api_token="octoai-api-key",
-                endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",
-                model_kwargs={
-                    "model": "llama-2-7b-chat",
+                    "model": "llama-2-13b-chat-fp16",
                    "messages": [
                        {
                            "role": "system",
@ -49,7 +35,10 @@ class OctoAIEndpoint(LLM):
                        }
                    ],
                    "stream": False,
-                    "max_tokens": 256
+                    "max_tokens": 256,
+                    "presence_penalty": 0,
+                    "temperature": 0.1,
+                    "top_p": 0.9
                }
            )

@ -119,19 +108,45 @@ class OctoAIEndpoint(LLM):
        _model_kwargs = self.model_kwargs or {}

        try:
-            # Initialize the OctoAI client
            from octoai import client

+            # Initialize the OctoAI client
            octoai_client = client.Client(token=self.octoai_api_token)

            if "model" in _model_kwargs:
                parameter_payload = _model_kwargs
+
+                sys_msg = None
+                if "messages" in parameter_payload:
+                    msgs = parameter_payload.get("messages", [])
+                    for msg in msgs:
+                        if msg.get("role") == "system":
+                            sys_msg = msg.get("content")
+
+                # Reset messages list
+                parameter_payload["messages"] = []
+
+                # Append system message if exists
+                if sys_msg:
+                    parameter_payload["messages"].append(
+                        {"role": "system", "content": sys_msg}
+                    )
+
+                # Append user message
                parameter_payload["messages"].append(
                    {"role": "user", "content": prompt}
                )
+
                # Send the request using the OctoAI client
-                output = octoai_client.infer(self.endpoint_url, parameter_payload)
-                text = output.get("choices")[0].get("message").get("content")
+                try:
+                    output = octoai_client.infer(self.endpoint_url, parameter_payload)
+                    if output and "choices" in output and len(output["choices"]) > 0:
+                        text = output["choices"][0].get("message", {}).get("content")
+                    else:
+                        text = "Error: Invalid response format or empty choices."
+                except Exception as e:
+                    text = f"Error during API call: {str(e)}"
+
            else:
                # Prepare the payload JSON
                parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}