community[minor]: Update OctoAI LLM, Embedding and documentation (#16710)

This PR includes updates for OctoAI integrations:
- The LLM class was updated to fix a bug that occurs with multiple
sequential calls
- The Embedding class was updated to support the new GTE-Large endpoint
released on OctoAI lately
- The documentation jupyter notebook was updated to reflect using the
new LLM sdk
Thank you!
pull/16747/head
Bassem Yacoube 5 months ago committed by GitHub
parent 6d6226d96d
commit 85e93e05ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -26,19 +26,19 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"OCTOAI_API_TOKEN\"] = \"OCTOAI_API_TOKEN\"\n",
"os.environ[\"ENDPOINT_URL\"] = \"https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate\""
"os.environ[\"ENDPOINT_URL\"] = \"https://text.octoai.run/v1/chat/completions\""
]
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@ -56,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@ -66,36 +66,40 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"llm = OctoAIEndpoint(\n",
" model_kwargs={\n",
" \"max_new_tokens\": 200,\n",
" \"temperature\": 0.75,\n",
" \"top_p\": 0.95,\n",
" \"repetition_penalty\": 1,\n",
" \"seed\": None,\n",
" \"stop\": [],\n",
" \"model\": \"llama-2-13b-chat-fp16\",\n",
" \"max_tokens\": 128,\n",
" \"presence_penalty\": 0,\n",
" \"temperature\": 0.1,\n",
" \"top_p\": 0.9,\n",
" \"messages\": [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are a helpful assistant. Keep your responses limited to one short paragraph if possible.\",\n",
" },\n",
" ],\n",
" },\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nLeonardo da Vinci was an Italian polymath and painter regarded by many as one of the greatest painters of all time. He is best known for his masterpieces including Mona Lisa, The Last Supper, and The Virgin of the Rocks. He was a draftsman, sculptor, architect, and one of the most important figures in the history of science. Da Vinci flew gliders, experimented with water turbines and windmills, and invented the catapult and a joystick-type human-powered aircraft control. He may have pioneered helicopters. As a scholar, he was interested in anatomy, geology, botany, engineering, mathematics, and astronomy.\\nOther painters and patrons claimed to be more talented, but Leonardo da Vinci was an incredibly productive artist, sculptor, engineer, anatomist, and scientist.'"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
" Sure thing! Here's my response:\n",
"\n",
"Leonardo da Vinci was a true Renaissance man - an Italian polymath who excelled in various fields, including painting, sculpture, engineering, mathematics, anatomy, and geology. He is widely considered one of the greatest painters of all time, and his inventive and innovative works continue to inspire and influence artists and thinkers to this day. Some of his most famous works include the Mona Lisa, The Last Supper, and Vitruvian Man. \n"
]
}
],
"source": [
@ -103,7 +107,7 @@
"\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"\n",
"llm_chain.run(question)"
"print(llm_chain.run(question))"
]
}
],
@ -123,7 +127,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.11.7"
},
"vscode": {
"interpreter": {

@ -41,7 +41,7 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
values, "octoai_api_token", "OCTOAI_API_TOKEN"
)
values["endpoint_url"] = get_from_dict_or_env(
values, "endpoint_url", "ENDPOINT_URL"
values, "endpoint_url", "https://text.octoai.run/v1/embeddings"
)
return values
@ -59,19 +59,29 @@ class OctoAIEmbeddings(BaseModel, Embeddings):
"""Compute embeddings using an OctoAI instruct model."""
from octoai import client
embedding = []
embeddings = []
octoai_client = client.Client(token=self.octoai_api_token)
for text in texts:
parameter_payload = {
"sentence": str([text]), # for item in text]),
"instruction": str([instruction]), # for item in text]),
"sentence": str([text]),
"input": str([text]),
"instruction": str([instruction]),
"model": "thenlper/gte-large",
"parameters": self.model_kwargs or {},
}
try:
resp_json = octoai_client.infer(self.endpoint_url, parameter_payload)
embedding = resp_json["embeddings"]
if "embeddings" in resp_json:
embedding = resp_json["embeddings"]
elif "data" in resp_json:
json_data = resp_json["data"]
for item in json_data:
if "embedding" in item:
embedding.append(item["embedding"])
except Exception as e:
raise ValueError(f"Error raised by the inference endpoint: {e}") from e

@ -24,23 +24,9 @@ class OctoAIEndpoint(LLM):
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
OctoAIEndpoint(
octoai_api_token="octoai-api-key",
endpoint_url="https://mpt-7b-demo-f1kzsig6xes9.octoai.run/generate",
endpoint_url="https://text.octoai.run/v1/chat/completions",
model_kwargs={
"max_new_tokens": 200,
"temperature": 0.75,
"top_p": 0.95,
"repetition_penalty": 1,
"seed": None,
"stop": [],
},
)
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
OctoAIEndpoint(
octoai_api_token="octoai-api-key",
endpoint_url="https://llama-2-7b-chat-demo-kk0powt97tmb.octoai.run/v1/chat/completions",
model_kwargs={
"model": "llama-2-7b-chat",
"model": "llama-2-13b-chat-fp16",
"messages": [
{
"role": "system",
@ -49,7 +35,10 @@ class OctoAIEndpoint(LLM):
}
],
"stream": False,
"max_tokens": 256
"max_tokens": 256,
"presence_penalty": 0,
"temperature": 0.1,
"top_p": 0.9
}
)
@ -119,19 +108,45 @@ class OctoAIEndpoint(LLM):
_model_kwargs = self.model_kwargs or {}
try:
# Initialize the OctoAI client
from octoai import client
# Initialize the OctoAI client
octoai_client = client.Client(token=self.octoai_api_token)
if "model" in _model_kwargs:
parameter_payload = _model_kwargs
sys_msg = None
if "messages" in parameter_payload:
msgs = parameter_payload.get("messages", [])
for msg in msgs:
if msg.get("role") == "system":
sys_msg = msg.get("content")
# Reset messages list
parameter_payload["messages"] = []
# Append system message if exists
if sys_msg:
parameter_payload["messages"].append(
{"role": "system", "content": sys_msg}
)
# Append user message
parameter_payload["messages"].append(
{"role": "user", "content": prompt}
)
# Send the request using the OctoAI client
output = octoai_client.infer(self.endpoint_url, parameter_payload)
text = output.get("choices")[0].get("message").get("content")
try:
output = octoai_client.infer(self.endpoint_url, parameter_payload)
if output and "choices" in output and len(output["choices"]) > 0:
text = output["choices"][0].get("message", {}).get("content")
else:
text = "Error: Invalid response format or empty choices."
except Exception as e:
text = f"Error during API call: {str(e)}"
else:
# Prepare the payload JSON
parameter_payload = {"inputs": prompt, "parameters": _model_kwargs}

Loading…
Cancel
Save