Merge branch 'master' into fork/feature_audio_loader_auzre_speech

3 months ago · a59739e1d2
parent 02b4c7ff67 44b33fcc76
commit a59739e1d2
46 changed files with 2492 additions and 165 deletions
--- a/2
+++ b/2
@ -50,7 +50,7 @@ lint lint_package lint_tests:
 	poetry run ruff docs templates cookbook
 	poetry run ruff format docs templates cookbook --diff
 	poetry run ruff --select I docs templates cookbook
-	git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
+	git grep 'from langchain import' docs/docs templates cookbook | grep -vE 'from langchain import (hub)' && exit 1 || exit 0

 format format_diff:
 	poetry run ruff format docs templates cookbook
--- a/cookbook/apache_kafka_message_handling.ipynb
+++ b/cookbook/apache_kafka_message_handling.ipynb
@ -14,9 +14,9 @@
    "\n",
    "This notebook shows you how to use LangChain's standard chat features while passing the chat messages back and forth via Apache Kafka.\n",
    "\n",
-    "This goal is to simulate an architecture where the chat front end and the LLM are running as separate services that need to communicate with one another over an internal nework.\n",
+    "This goal is to simulate an architecture where the chat front end and the LLM are running as separate services that need to communicate with one another over an internal network.\n",
    "\n",
-    "It's an alternative to typical pattern of requesting a reponse from the model via a REST API (there's more info on why you would want to do this at the end of the notebook)."
+    "It's an alternative to typical pattern of requesting a response from the model via a REST API (there's more info on why you would want to do this at the end of the notebook)."
   ]
  },
  {
@ -261,7 +261,7 @@
    "\n",
    "Load Llama 2 and set the conversation buffer to 300 tokens using `ConversationTokenBufferMemory`. This value was used for running Llama in a CPU only container, so you can raise it if running in Google Colab. It prevents the container that is hosting the model from running out of memory.\n",
    "\n",
-    "Here, we're overiding the default system persona so that the chatbot has the personality of Marvin The Paranoid Android from the Hitchhiker's Guide to the Galaxy."
+    "Here, we're overriding the default system persona so that the chatbot has the personality of Marvin The Paranoid Android from the Hitchhiker's Guide to the Galaxy."
   ]
  },
  {
@ -272,7 +272,7 @@
   },
   "outputs": [],
   "source": [
-    "# Load the model with the apporiate parameters:\n",
+    "# Load the model with the appropriate parameters:\n",
    "llm = LlamaCpp(\n",
    "    model_path=model_path,\n",
    "    max_tokens=250,\n",
@ -551,7 +551,7 @@
    "\n",
    "  * **Scalability**: Apache Kafka is designed with parallel processing in mind, so many teams prefer to use it to more effectively distribute work to available workers (in this case the \"worker\" is a container running an LLM).\n",
    "\n",
-    "  * **Durability**: Kafka is designed to allow services to pick up where another service left off in the case where that service experienced a memory issue or went offline. This prevents data loss in highly complex, distribuited architectures where multiple systems are communicating with one another (LLMs being just one of many interdependent systems that also include vector databases and traditional databases).\n",
+    "  * **Durability**: Kafka is designed to allow services to pick up where another service left off in the case where that service experienced a memory issue or went offline. This prevents data loss in highly complex, distributed architectures where multiple systems are communicating with one another (LLMs being just one of many interdependent systems that also include vector databases and traditional databases).\n",
    "\n",
    "For more background on why event streaming is a good fit for Gen AI application architecture, see Kai Waehner's article [\"Apache Kafka + Vector Database + LLM = Real-Time GenAI\"](https://www.kai-waehner.de/blog/2023/11/08/apache-kafka-flink-vector-database-llm-real-time-genai/)."
   ]
--- a/docs/api_reference/conf.py
+++ b/docs/api_reference/conf.py
@ -1,4 +1,5 @@
 """Configuration file for the Sphinx documentation builder."""
+
 # Configuration file for the Sphinx documentation builder.
 #
 # This file only contains a selection of the most common options. For a full
@ -174,3 +175,6 @@ myst_enable_extensions = ["colon_fence"]

 # generate autosummary even if no references
 autosummary_generate = True
+
+html_copy_source = False
+html_show_sourcelink = False
--- a/docs/docs/get_started/quickstart.mdx
+++ b/docs/docs/get_started/quickstart.mdx
@ -224,7 +224,7 @@ After that, we can import and use WebBaseLoader.

 ```python
 from langchain_community.document_loaders import WebBaseLoader
-loader = WebBaseLoader("https://docs.smith.langchain.com")
+loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")

 docs = loader.load()
 ```
@ -543,7 +543,7 @@ from langchain_core.messages import BaseMessage
 from langserve import add_routes

 # 1. Load Retriever
-loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
+loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
 docs = loader.load()
 text_splitter = RecursiveCharacterTextSplitter()
 documents = text_splitter.split_documents(docs)
--- a/docs/docs/guides/local_llms.ipynb
+++ b/docs/docs/guides/local_llms.ipynb
@ -9,7 +9,7 @@
    "\n",
    "## Use case\n",
    "\n",
-    "The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the demand to run LLMs locally (on your own device).\n",
+    "The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), [GPT4All](https://github.com/nomic-ai/gpt4all), and [llamafile](https://github.com/Mozilla-Ocho/llamafile) underscore the demand to run LLMs locally (on your own device).\n",
    "\n",
    "This has at least two important benefits:\n",
    "\n",
@ -46,7 +46,8 @@
    "\n",
    "1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n",
    "2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n",
-    "3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM \n",
+    "3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM\n",
+    "4. [`llamafile`](https://github.com/Mozilla-Ocho/llamafile): Bundles model weights and everything needed to run the model in a single file, allowing you to run the LLM locally from this file without any additional installation steps\n",
    "\n",
    "In general, these frameworks will do a few things:\n",
    "\n",
@ -157,7 +158,7 @@
    "\n",
    "### Running Apple silicon GPU\n",
    "\n",
-    "`Ollama` will automatically utilize the GPU on Apple devices.\n",
+    "`Ollama` and [`llamafile`](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#gpu-support) will automatically utilize the GPU on Apple devices.\n",
    " \n",
    "Other frameworks require the user to set up the environment to utilize the Apple GPU.\n",
    "\n",
@ -191,7 +192,7 @@
    "\n",
    "There are various ways to gain access to quantized model weights.\n",
    "\n",
-    "1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp)\n",
+    "1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp). You can also download models in [`llamafile` format](https://huggingface.co/models?other=llamafile) from HuggingFace.\n",
    "2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n",
    "3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n",
    "\n",
@ -428,6 +429,62 @@
    "llm(\"The first man on the moon was ... Let's think step by step\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "056854e2-5e4b-4a03-be7e-03192e5c4e1e",
+   "metadata": {},
+   "source": [
+    "### llamafile\n",
+    "\n",
+    "One of the simplest ways to run an LLM locally is using a [llamafile](https://github.com/Mozilla-Ocho/llamafile). All you need to do is:\n",
+    "\n",
+    "1) Download a llamafile from [HuggingFace](https://huggingface.co/models?other=llamafile)\n",
+    "2) Make the file executable\n",
+    "3) Run the file\n",
+    "\n",
+    "llamafiles bundle model weights and a [specially-compiled](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#technical-details) version of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) into a single file that can run on most computers any additional dependencies. They also come with an embedded inference server that provides an [API](https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints) for interacting with your model. \n",
+    "\n",
+    "Here's a simple bash script that shows all 3 setup steps:\n",
+    "\n",
+    "```bash\n",
+    "# Download a llamafile from HuggingFace\n",
+    "wget https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Make the file executable. On Windows, instead just rename the file to end in \".exe\".\n",
+    "chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Start the model server. Listens at http://localhost:8080 by default.\n",
+    "./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser\n",
+    "```\n",
+    "\n",
+    "After you run the above setup steps, you can use LangChain to interact with your model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "002e655c-ba18-4db3-ac7b-f33e825d14b6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"\\nFirstly, let's imagine the scene where Neil Armstrong stepped onto the moon. This happened in 1969. The first man on the moon was Neil Armstrong. We already know that.\\n2nd, let's take a step back. Neil Armstrong didn't have any special powers. He had to land his spacecraft safely on the moon without injuring anyone or causing any damage. If he failed to do this, he would have been killed along with all those people who were on board the spacecraft.\\n3rd, let's imagine that Neil Armstrong successfully landed his spacecraft on the moon and made it back to Earth safely. The next step was for him to be hailed as a hero by his people back home. It took years before Neil Armstrong became an American hero.\\n4th, let's take another step back. Let's imagine that Neil Armstrong wasn't hailed as a hero, and instead, he was just forgotten. This happened in the 1970s. Neil Armstrong wasn't recognized for his remarkable achievement on the moon until after he died.\\n5th, let's take another step back. Let's imagine that Neil Armstrong didn't die in the 1970s and instead, lived to be a hundred years old. This happened in 2036. In the year 2036, Neil Armstrong would have been a centenarian.\\nNow, let's think about the present. Neil Armstrong is still alive. He turned 95 years old on July 20th, 2018. If he were to die now, his achievement of becoming the first human being to set foot on the moon would remain an unforgettable moment in history.\\nI hope this helps you understand the significance and importance of Neil Armstrong's achievement on the moon!\""
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.llms.llamafile import Llamafile\n",
+    "\n",
+    "llm = Llamafile()\n",
+    "\n",
+    "llm.invoke(\"The first man on the moon was ... Let's think step by step.\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "6b84e543",
@ -611,7 +668,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.7"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/chat/maritalk.ipynb
+++ b/docs/docs/integrations/chat/maritalk.ipynb
@ -0,0 +1,201 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/langchain-ai/langchain/docs/docs/integrations/chat/maritalk.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "# Maritalk\n",
+    "\n",
+    "## Introduction\n",
+    "\n",
+    "MariTalk is an assistant developed by the Brazilian company [Maritaca AI](www.maritaca.ai).\n",
+    "MariTalk is based on language models that have been specially trained to understand Portuguese well.\n",
+    "\n",
+    "This notebook demonstrates how to use MariTalk with LangChain through two examples:\n",
+    "\n",
+    "1. A simple example of how to use MariTalk to perform a task.\n",
+    "2. LLM + RAG: The second example shows how to answer a question whose answer is found in a long document that does not fit within the token limit of MariTalk. For this, we will use a simple searcher (BM25) to first search the document for the most relevant sections and then feed them to MariTalk for answering."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation\n",
+    "First, install the LangChain library (and all its dependencies) using the following command:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install langchain-core langchain-community"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## API Key\n",
+    "You will need an API key that can be obtained from chat.maritaca.ai (\"Chaves da API\" section)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "### Example 1 - Pet Name Suggestions\n",
+    "\n",
+    "Let's define our language model, ChatMaritalk, and configure it with your API key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts.chat import ChatPromptTemplate\n",
+    "from langchain_community.chat_models import ChatMaritalk\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "\n",
+    "llm = ChatMaritalk(\n",
+    "    api_key=\"\",  # Insert your API key here\n",
+    "    temperature=0.7,\n",
+    "    max_tokens=100,\n",
+    ")\n",
+    "\n",
+    "output_parser = StrOutputParser()\n",
+    "\n",
+    "chat_prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are an assistant specialized in suggesting pet names. Given the animal, you must suggest 4 names.\",\n",
+    "        ),\n",
+    "        (\"human\", \"I have a {animal}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = chat_prompt | llm | output_parser\n",
+    "\n",
+    "response = chain.invoke({\"animal\": \"dog\"})\n",
+    "print(response)  # should answer something like \"1. Max\\n2. Bella\\n3. Charlie\\n4. Rocky\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Example 2 - RAG + LLM: UNICAMP 2024 Entrance Exam Question Answering System\n",
+    "For this example, we need to install some extra libraries:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install unstructured rank_bm25 pdf2image pdfminer-six pikepdf pypdf unstructured_inference fastapi kaleido uvicorn \"pillow<10.1.0\" pillow_heif -q"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Loading the database\n",
+    "\n",
+    "The first step is to create a database with the information from the notice. For this, we will download the notice from the COMVEST website and segment the extracted text into 500-character windows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.document_loaders import OnlinePDFLoader\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "# Loading the COMVEST 2024 notice\n",
+    "loader = OnlinePDFLoader(\n",
+    "    \"https://www.comvest.unicamp.br/wp-content/uploads/2023/10/31-2023-Dispoe-sobre-o-Vestibular-Unicamp-2024_com-retificacao.pdf\"\n",
+    ")\n",
+    "data = loader.load()\n",
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=500, chunk_overlap=100, separators=[\"\\n\", \" \", \"\"]\n",
+    ")\n",
+    "texts = text_splitter.split_documents(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Creating a Searcher\n",
+    "Now that we have our database, we need a searcher. For this example, we will use a simple BM25 as a search system, but this could be replaced by any other searcher (such as search via embeddings)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.retrievers import BM25Retriever\n",
+    "\n",
+    "retriever = BM25Retriever.from_documents(texts)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Combining Search System + LLM\n",
+    "Now that we have our searcher, we just need to implement a prompt specifying the task and invoke the chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "\n",
+    "prompt = \"\"\"Baseado nos seguintes documentos, responda a pergunta abaixo.\n",
+    "\n",
+    "{context}\n",
+    "\n",
+    "Pergunta: {query}\n",
+    "\"\"\"\n",
+    "\n",
+    "qa_prompt = ChatPromptTemplate.from_messages([(\"human\", prompt)])\n",
+    "\n",
+    "chain = load_qa_chain(llm, chain_type=\"stuff\", verbose=True, prompt=qa_prompt)\n",
+    "\n",
+    "query = \"Qual o tempo máximo para realização da prova?\"\n",
+    "\n",
+    "docs = retriever.get_relevant_documents(query)\n",
+    "\n",
+    "chain.invoke(\n",
+    "    {\"input_documents\": docs, \"query\": query}\n",
+    ")  # Should output something like: \"O tempo máximo para realização da prova é de 5 horas.\""
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/docs/docs/integrations/chat/openai.ipynb
+++ b/docs/docs/integrations/chat/openai.ipynb
@ -29,12 +29,12 @@
   },
   "outputs": [],
   "source": [
-    "from langchain.prompts.chat import (\n",
+    "from langchain_core.messages import HumanMessage, SystemMessage\n",
+    "from langchain_core.prompts.chat import (\n",
    "    ChatPromptTemplate,\n",
    "    HumanMessagePromptTemplate,\n",
    "    SystemMessagePromptTemplate,\n",
    ")\n",
-    "from langchain_core.messages import HumanMessage, SystemMessage\n",
    "from langchain_openai import ChatOpenAI"
   ]
  },
@ -91,7 +91,7 @@
    "        content=\"Translate this sentence from English to French. I love programming.\"\n",
    "    ),\n",
    "]\n",
-    "chat(messages)"
+    "chat.invoke(messages)"
   ]
  },
  {
@ -144,7 +144,7 @@
    ")\n",
    "\n",
    "# get a chat completion from the formatted messages\n",
-    "chat(\n",
+    "chat.invoke(\n",
    "    chat_prompt.format_prompt(\n",
    "        input_language=\"English\", output_language=\"French\", text=\"I love programming.\"\n",
    "    ).to_messages()\n",
--- a/docs/docs/integrations/chat/perplexity.ipynb
+++ b/docs/docs/integrations/chat/perplexity.ipynb
@ -0,0 +1,229 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "a016701c",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Perplexity\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bf733a38-db84-4363-89e2-de6735c37230",
+   "metadata": {},
+   "source": [
+    "# ChatPerplexity\n",
+    "\n",
+    "This notebook covers how to get started with Perplexity chat models."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-19T11:25:00.590587Z",
+     "start_time": "2024-01-19T11:25:00.127293Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_community.chat_models import ChatPerplexity\n",
+    "from langchain_core.prompts import ChatPromptTemplate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97a8ce3a",
+   "metadata": {},
+   "source": [
+    "The code provided assumes that your PPLX_API_KEY is set in your environment variables. If you would like to manually specify your API key and also choose a different model, you can use the following code:\n",
+    "\n",
+    "```python\n",
+    "chat = ChatPerplexity(temperature=0, pplx_api_key=\"YOUR_API_KEY\", model=\"pplx-70b-online\")\n",
+    "```\n",
+    "\n",
+    "You can check a list of available models [here](https://docs.perplexity.ai/docs/model-cards). For reproducibility, we can set the API key dynamically by taking it as an input in this notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d3e49d78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "PPLX_API_KEY = getpass()\n",
+    "os.environ[\"PPLX_API_KEY\"] = PPLX_API_KEY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-19T11:25:04.349676Z",
+     "start_time": "2024-01-19T11:25:03.964930Z"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "chat = ChatPerplexity(temperature=0, model=\"pplx-70b-online\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-19T11:25:07.274418Z",
+     "start_time": "2024-01-19T11:25:05.898031Z"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The Higgs Boson is an elementary subatomic particle that plays a crucial role in the Standard Model of particle physics, which accounts for three of the four fundamental forces governing the behavior of our universe: the strong and weak nuclear forces, electromagnetism, and gravity. The Higgs Boson is important for several reasons:\\n\\n1. **Final Elementary Particle**: The Higgs Boson is the last elementary particle waiting to be discovered under the Standard Model. Its detection helps complete the Standard Model and further our understanding of the fundamental forces in the universe.\\n\\n2. **Mass Generation**: The Higgs Boson is responsible for giving mass to other particles, a process that occurs through its interaction with the Higgs field. This mass generation is essential for the formation of atoms, molecules, and the visible matter we observe in the universe.\\n\\n3. **Implications for New Physics**: While the detection of the Higgs Boson has confirmed many aspects of the Standard Model, it also opens up new possibilities for discoveries beyond the Standard Model. Further research on the Higgs Boson could reveal insights into the nature of dark matter, supersymmetry, and other exotic phenomena.\\n\\n4. **Advancements in Technology**: The search for the Higgs Boson has led to significant advancements in technology, such as the development of artificial intelligence and machine learning algorithms used in particle accelerators like the Large Hadron Collider (LHC). These advancements have not only contributed to the discovery of the Higgs Boson but also have potential applications in various other fields.\\n\\nIn summary, the Higgs Boson is important because it completes the Standard Model, plays a crucial role in mass generation, hints at new physics phenomena beyond the Standard Model, and drives advancements in technology.\\n'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "system = \"You are a helpful assistant.\"\n",
+    "human = \"{input}\"\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
+    "\n",
+    "chain = prompt | chat\n",
+    "response = chain.invoke({\"input\": \"Why is the Higgs Boson important?\"})\n",
+    "response.content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de6d8d5a",
+   "metadata": {},
+   "source": [
+    "You can format and structure the prompts like you would typically. In the following example, we ask the model to tell us a joke about cats."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-19T11:25:10.448733Z",
+     "start_time": "2024-01-19T11:25:08.866277Z"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Here\\'s a joke about cats:\\n\\nWhy did the cat want math lessons from a mermaid?\\n\\nBecause it couldn\\'t find its \"core purpose\" in life!\\n\\nRemember, cats are unique and fascinating creatures, and each one has its own special traits and abilities. While some may see them as mysterious or even a bit aloof, they are still beloved pets that bring joy and companionship to their owners. So, if your cat ever seeks guidance from a mermaid, just remember that they are on their own journey to self-discovery!\\n'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat = ChatPerplexity(temperature=0, model=\"pplx-70b-online\")\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"human\", \"Tell me a joke about {topic}\")])\n",
+    "chain = prompt | chat\n",
+    "response = chain.invoke({\"topic\": \"cats\"})\n",
+    "response.content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13d93dc4",
+   "metadata": {},
+   "source": [
+    "## `ChatPerplexity` also supports streaming functionality:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-01-19T11:25:24.438696Z",
+     "start_time": "2024-01-19T11:25:14.687480Z"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Here is a list of some famous tourist attractions in Pakistan:\n",
+      "\n",
+      "1. **Minar-e-Pakistan**: A 62-meter high minaret in Lahore that represents the history of Pakistan.\n",
+      "2. **Badshahi Mosque**: A historic mosque in Lahore with a capacity of 10,000 worshippers.\n",
+      "3. **Shalimar Gardens**: A beautiful garden in Lahore with landscaped grounds and a series of cascading pools.\n",
+      "4. **Pakistan Monument**: A national monument in Islamabad representing the four provinces and three districts of Pakistan.\n",
+      "5. **National Museum of Pakistan**: A museum in Karachi showcasing the country's cultural history.\n",
+      "6. **Faisal Mosque**: A large mosque in Islamabad that can accommodate up to 300,000 worshippers.\n",
+      "7. **Clifton Beach**: A popular beach in Karachi offering water activities and recreational facilities.\n",
+      "8. **Kartarpur Corridor**: A visa-free border crossing and religious corridor connecting Gurdwara Darbar Sahib in Pakistan to Gurudwara Sri Kartarpur Sahib in India.\n",
+      "9. **Mohenjo-daro**: An ancient Indus Valley civilization site in Sindh, Pakistan, dating back to around 2500 BCE.\n",
+      "10. **Hunza Valley**: A picturesque valley in Gilgit-Baltistan known for its stunning mountain scenery and unique culture.\n",
+      "\n",
+      "These attractions showcase the rich history, diverse culture, and natural beauty of Pakistan, making them popular destinations for both local and international tourists.\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat = ChatPerplexity(temperature=0.7, model=\"pplx-70b-online\")\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [(\"human\", \"Give me a list of famous tourist attractions in Pakistan\")]\n",
+    ")\n",
+    "chain = prompt | chat\n",
+    "for chunk in chain.stream({}):\n",
+    "    print(chunk.content, end=\"\", flush=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/llms/azure_openai.ipynb
+++ b/docs/docs/integrations/llms/azure_openai.ipynb
@ -15,21 +15,19 @@
    "You can configure the `openai` package to use Azure OpenAI using environment variables.  The following is for `bash`:\n",
    "\n",
    "```bash\n",
-    "# Set this to `azure`\n",
-    "export OPENAI_API_TYPE=azure\n",
-    "# The API version you want to use: set this to `2023-05-15` for the released version.\n",
-    "export OPENAI_API_VERSION=2023-05-15\n",
+    "# The API version you want to use: set this to `2023-12-01-preview` for the released version.\n",
+    "export OPENAI_API_VERSION=2023-12-01-preview\n",
    "# The base URL for your Azure OpenAI resource.  You can find this in the Azure portal under your Azure OpenAI resource.\n",
-    "export OPENAI_API_BASE=https://your-resource-name.openai.azure.com\n",
+    "export AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com\n",
    "# The API key for your Azure OpenAI resource.  You can find this in the Azure portal under your Azure OpenAI resource.\n",
-    "export OPENAI_API_KEY=<your Azure OpenAI API key>\n",
+    "export AZURE_OPENAI_API_KEY=<your Azure OpenAI API key>\n",
    "```\n",
    "\n",
    "Alternatively, you can configure the API right within your running Python environment:\n",
    "\n",
    "```python\n",
    "import os\n",
-    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
+    "os.environ[\"OPENAI_API_VERSION\"] = \"2023-12-01-preview\"\n",
    "```\n",
    "\n",
    "## Azure Active Directory Authentication\n",
@ -76,15 +74,18 @@
    "\n",
    "_**Note**: These docs are for the Azure text completion models. Models like GPT-4 are chat models. They have a slightly different interface, and can be accessed via the `AzureChatOpenAI` class. For docs on Azure chat see [Azure Chat OpenAI documentation](/docs/integrations/chat/azure_chat_openai)._\n",
    "\n",
-    "Let's say your deployment name is `text-davinci-002-prod`.  In the `openai` Python API, you can specify this deployment with the `engine` parameter.  For example:\n",
+    "Let's say your deployment name is `gpt-35-turbo-instruct-prod`.  In the `openai` Python API, you can specify this deployment with the `engine` parameter.  For example:\n",
    "\n",
    "```python\n",
    "import openai\n",
    "\n",
-    "response = openai.Completion.create(\n",
-    "    engine=\"text-davinci-002-prod\",\n",
-    "    prompt=\"This is a test\",\n",
-    "    max_tokens=5\n",
+    "client = AzureOpenAI(\n",
+    "    api_version=\"2023-12-01-preview\",\n",
+    ")\n",
+    "\n",
+    "response = client.completions.create(\n",
+    "    model=\"gpt-35-turbo-instruct-prod\",\n",
+    "    prompt=\"Test prompt\"\n",
    ")\n",
    "```\n"
   ]
@ -103,22 +104,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
   "id": "faacfa54",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
-    "os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
-    "os.environ[\"OPENAI_API_VERSION\"] = \"2023-05-15\"\n",
-    "os.environ[\"OPENAI_API_BASE\"] = \"...\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"...\""
+    "os.environ[\"OPENAI_API_VERSION\"] = \"2023-12-01-preview\"\n",
+    "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"...\"\n",
+    "os.environ[\"AZURE_OPENAI_API_KEY\"] = \"...\""
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "id": "8fad2a6e",
   "metadata": {},
   "outputs": [],
@ -129,7 +129,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "id": "8c80213a",
   "metadata": {},
   "outputs": [],
@ -137,31 +137,28 @@
    "# Create an instance of Azure OpenAI\n",
    "# Replace the deployment name with your own\n",
    "llm = AzureOpenAI(\n",
-    "    deployment_name=\"td2\",\n",
-    "    model_name=\"gpt-3.5-turbo-instruct\",\n",
+    "    deployment_name=\"gpt-35-turbo-instruct-0914\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "592dc404",
   "metadata": {},
   "outputs": [
    {
     "data": {
-      "text/plain": [
-       "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
-      ]
+      "text/plain": "\" Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\""
     },
-     "execution_count": 6,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Run the LLM\n",
-    "llm(\"Tell me a joke\")"
+    "llm.invoke(\"Tell me a joke\")"
   ]
  },
  {
@ -174,7 +171,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
   "id": "9c33fa19",
   "metadata": {},
   "outputs": [
@ -182,8 +179,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "\u001b[1mAzureOpenAI\u001b[0m\n",
-      "Params: {'deployment_name': 'text-davinci-002', 'model_name': 'text-davinci-002', 'temperature': 0.7, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n"
+      "\u001B[1mAzureOpenAI\u001B[0m\n",
+      "Params: {'deployment_name': 'gpt-35-turbo-instruct-0914', 'model_name': 'gpt-3.5-turbo-instruct', 'temperature': 0.7, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 256}\n"
     ]
    }
   ],
--- a/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb
+++ b/docs/docs/integrations/llms/baidu_qianfan_endpoint.ipynb
@ -55,7 +55,7 @@
     "output_type": "stream",
     "text": [
      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: trying to refresh access_token\n",
-      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: sucessfully refresh access_token\n",
+      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: successfully refresh access_token\n",
      "[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
     ]
    },
--- a/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb
+++ b/docs/docs/integrations/text_embedding/baidu_qianfan_endpoint.ipynb
@ -42,7 +42,7 @@
     "output_type": "stream",
     "text": [
      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: trying to refresh access_token\n",
-      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: sucessfully refresh access_token\n",
+      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: successfully refresh access_token\n",
      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: requesting llm api endpoint: /embeddings/embedding-v1\n",
      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: async requesting llm api endpoint: /embeddings/embedding-v1\n",
      "[INFO] [09-15 20:01:35] logging.py:55 [t:140292313159488]: async requesting llm api endpoint: /embeddings/embedding-v1\n"
--- a/docs/docs/integrations/text_embedding/llamafile.ipynb
+++ b/docs/docs/integrations/text_embedding/llamafile.ipynb
@ -0,0 +1,157 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "278b6c63",
+   "metadata": {},
+   "source": [
+    "# llamafile\n",
+    "\n",
+    "Let's load the [llamafile](https://github.com/Mozilla-Ocho/llamafile) Embeddings class.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "First, the are 3 setup steps:\n",
+    "\n",
+    "1. Download a llamafile. In this notebook, we use `TinyLlama-1.1B-Chat-v1.0.Q5_K_M` but there are many others available on [HuggingFace](https://huggingface.co/models?other=llamafile).\n",
+    "2. Make the llamafile executable.\n",
+    "3. Start the llamafile in server mode.\n",
+    "\n",
+    "You can run the following bash script to do all this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "43ef6dfa-9cc4-4552-8a53-5df523afae7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "# llamafile setup\n",
+    "\n",
+    "# Step 1: Download a llamafile. The download may take several minutes.\n",
+    "wget -nv -nc https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Step 2: Make the llamafile executable. Note: if you're on Windows, just append '.exe' to the filename.\n",
+    "chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Step 3: Start llamafile server in background. All the server logs will be written to 'tinyllama.log'.\n",
+    "# Alternatively, you can just open a separate terminal outside this notebook and run: \n",
+    "#   ./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding\n",
+    "./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser --embedding > tinyllama.log 2>&1 &\n",
+    "pid=$!\n",
+    "echo \"${pid}\" > .llamafile_pid  # write the process pid to a file so we can terminate the server later"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3188b22f-879f-47b3-9a27-24412f6fad5f",
+   "metadata": {},
+   "source": [
+    "## Embedding texts using LlamafileEmbeddings\n",
+    "\n",
+    "Now, we can use the `LlamafileEmbeddings` class to interact with the llamafile server that's currently serving our TinyLlama model at http://localhost:8080."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0be1af71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.embeddings import LlamafileEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c66e5da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embedder = LlamafileEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01370375",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a42e4035",
+   "metadata": {},
+   "source": [
+    "To generate embeddings, you can either query an invidivual text, or you can query a list of texts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "91bc875d-829b-4c3d-8e6f-fc2dda30a3bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_result = embedder.embed_query(text)\n",
+    "query_result[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4b0d49e-0c73-44b6-aed5-5b426564e085",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc_result = embedder.embed_documents([text])\n",
+    "doc_result[0][:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ccc78fc-03ae-411d-ae73-74a4ee91c725",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "# cleanup: kill the llamafile server process\n",
+    "kill $(cat .llamafile_pid)\n",
+    "rm .llamafile_pid"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/use_cases/graph/graph_gremlin_cosmosdb_qa.ipynb
+++ b/docs/docs/use_cases/graph/graph_gremlin_cosmosdb_qa.ipynb
@ -0,0 +1,239 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c94240f5",
+   "metadata": {},
+   "source": [
+    "# Gremlin (with CosmosDB) QA chain\n",
+    "\n",
+    "This notebook shows how to use LLMs to provide a natural language interface to a graph database you can query with the Gremlin query language."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbc0ee68",
+   "metadata": {},
+   "source": [
+    "You will need to have a Azure CosmosDB Graph database instance. One option is to create a [free CosmosDB Graph database instance in Azure](https://learn.microsoft.com/en-us/azure/cosmos-db/free-tier). \n",
+    "\n",
+    "When you create your Cosmos DB account and Graph, use /type as partition key."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62812aad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "from langchain.chains.graph_qa import GremlinQAChain\n",
+    "from langchain.schema import Document\n",
+    "from langchain_community.graphs import GremlinGraph\n",
+    "from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship\n",
+    "from langchain_openai import AzureChatOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0928915d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cosmosdb_name = \"mycosmosdb\"\n",
+    "cosmosdb_db_id = \"graphtesting\"\n",
+    "cosmosdb_db_graph_id = \"mygraph\"\n",
+    "cosmosdb_access_Key = \"longstring==\"\n",
+    "\n",
+    "graph = GremlinGraph(\n",
+    "    url=f\"=wss://{cosmosdb_name}.gremlin.cosmos.azure.com:443/\",\n",
+    "    username=f\"/dbs/{cosmosdb_db_id}/colls/{cosmosdb_db_graph_id}\",\n",
+    "    password=cosmosdb_access_Key,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "995ea9b9",
+   "metadata": {},
+   "source": [
+    "## Seeding the database\n",
+    "\n",
+    "Assuming your database is empty, you can populate it using the GraphDocuments\n",
+    "\n",
+    "For Gremlin, always add property called 'label' for each Node.\n",
+    "If no label is set, Node.type is used as a label.\n",
+    "For cosmos using natural id's make sense, as they are visible in the graph explorer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fedd26b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source_doc = Document(\n",
+    "    page_content=\"Matrix is a movie where Keanu Reeves, Laurence Fishburne and Carrie-Anne Moss acted.\"\n",
+    ")\n",
+    "movie = Node(id=\"The Matrix\", properties={\"label\": \"movie\", \"title\": \"The Matrix\"})\n",
+    "actor1 = Node(id=\"Keanu Reeves\", properties={\"label\": \"actor\", \"name\": \"Keanu Reeves\"})\n",
+    "actor2 = Node(\n",
+    "    id=\"Laurence Fishburne\", properties={\"label\": \"actor\", \"name\": \"Laurence Fishburne\"}\n",
+    ")\n",
+    "actor3 = Node(\n",
+    "    id=\"Carrie-Anne Moss\", properties={\"label\": \"actor\", \"name\": \"Carrie-Anne Moss\"}\n",
+    ")\n",
+    "rel1 = Relationship(\n",
+    "    id=5, type=\"ActedIn\", source=actor1, target=movie, properties={\"label\": \"ActedIn\"}\n",
+    ")\n",
+    "rel2 = Relationship(\n",
+    "    id=6, type=\"ActedIn\", source=actor2, target=movie, properties={\"label\": \"ActedIn\"}\n",
+    ")\n",
+    "rel3 = Relationship(\n",
+    "    id=7, type=\"ActedIn\", source=actor3, target=movie, properties={\"label\": \"ActedIn\"}\n",
+    ")\n",
+    "rel4 = Relationship(\n",
+    "    id=8,\n",
+    "    type=\"Starring\",\n",
+    "    source=movie,\n",
+    "    target=actor1,\n",
+    "    properties={\"label\": \"Strarring\"},\n",
+    ")\n",
+    "rel5 = Relationship(\n",
+    "    id=9,\n",
+    "    type=\"Starring\",\n",
+    "    source=movie,\n",
+    "    target=actor2,\n",
+    "    properties={\"label\": \"Strarring\"},\n",
+    ")\n",
+    "rel6 = Relationship(\n",
+    "    id=10,\n",
+    "    type=\"Straring\",\n",
+    "    source=movie,\n",
+    "    target=actor3,\n",
+    "    properties={\"label\": \"Strarring\"},\n",
+    ")\n",
+    "graph_doc = GraphDocument(\n",
+    "    nodes=[movie, actor1, actor2, actor3],\n",
+    "    relationships=[rel1, rel2, rel3, rel4, rel5, rel6],\n",
+    "    source=source_doc,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d18f77a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The underlying python-gremlin has a problem when running in notebook\n",
+    "# The following line is a workaround to fix the problem\n",
+    "nest_asyncio.apply()\n",
+    "\n",
+    "# Add the document to the CosmosDB graph.\n",
+    "graph.add_graph_documents([graph_doc])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "58c1a8ea",
+   "metadata": {},
+   "source": [
+    "## Refresh graph schema information\n",
+    "If the schema of database changes (after updates), you can refresh the schema information.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e3de44f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "graph.refresh_schema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1fe76ccd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(graph.schema)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68a3c677",
+   "metadata": {},
+   "source": [
+    "## Querying the graph\n",
+    "\n",
+    "We can now use the gremlin QA chain to ask question of the graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7476ce98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = GremlinQAChain.from_llm(\n",
+    "    AzureChatOpenAI(\n",
+    "        temperature=0,\n",
+    "        azure_deployment=\"gpt-4-turbo\",\n",
+    "    ),\n",
+    "    graph=graph,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef8ee27b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain.invoke(\"Who played in The Matrix?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47c64027-cf42-493a-9c76-2d10ba753728",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain.run(\"How many people played in The Matrix?\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/use_cases/question_answering/local_retrieval_qa.ipynb
+++ b/docs/docs/use_cases/question_answering/local_retrieval_qa.ipynb
@ -7,7 +7,7 @@
   "source": [
    "# Using local models\n",
    "\n",
-    "The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the importance of running LLMs locally.\n",
+    "The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), [GPT4All](https://github.com/nomic-ai/gpt4all), and [llamafile](https://github.com/Mozilla-Ocho/llamafile) underscore the importance of running LLMs locally.\n",
    "\n",
    "LangChain has [integrations](https://integrations.langchain.com/) with many open-source LLMs that can be run locally.\n",
    "\n",
@ -330,6 +330,62 @@
    ")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "e6d012e4-0eef-4734-a826-89ec74fe9f88",
+   "metadata": {},
+   "source": [
+    "### llamafile\n",
+    "\n",
+    "One of the simplest ways to run an LLM locally is using a [llamafile](https://github.com/Mozilla-Ocho/llamafile). All you need to do is:\n",
+    "\n",
+    "1) Download a llamafile from [HuggingFace](https://huggingface.co/models?other=llamafile)\n",
+    "2) Make the file executable\n",
+    "3) Run the file\n",
+    "\n",
+    "llamafiles bundle model weights and a [specially-compiled](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#technical-details) version of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) into a single file that can run on most computers without any additional dependencies. They also come with an embedded inference server that provides an [API](https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints) for interacting with your model. \n",
+    "\n",
+    "Here's a simple bash script that shows all 3 setup steps:\n",
+    "\n",
+    "```bash\n",
+    "# Download a llamafile from HuggingFace\n",
+    "wget https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Make the file executable. On Windows, instead just rename the file to end in \".exe\".\n",
+    "chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
+    "\n",
+    "# Start the model server. Listens at http://localhost:8080 by default.\n",
+    "./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser\n",
+    "```\n",
+    "\n",
+    "After you run the above setup steps, you can interact with the model via LangChain:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "735e45b6-9aff-463e-aae4-bbf8ac2b21c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\n-1 1/2 (8 oz. Pounds) ground beef, browned and cooked until no longer pink\\n-3 cups whole wheat spaghetti\\n-4 (10 oz) cans diced tomatoes with garlic and basil\\n-2 eggs, beaten\\n-1 cup grated parmesan cheese\\n-1/2 teaspoon salt\\n-1/4 teaspoon black pepper\\n-1 cup breadcrumbs (16 oz)\\n-2 tablespoons olive oil\\n\\nInstructions:\\n1. Cook spaghetti according to package directions. Drain and set aside.\\n2. In a large skillet, brown ground beef over medium heat until no longer pink. Drain any excess grease.\\n3. Stir in diced tomatoes with garlic and basil, and season with salt and pepper. Cook for 5 to 7 minutes or until sauce is heated through. Set aside.\\n4. In a large bowl, beat eggs with a fork or whisk until fluffy. Add cheese, salt, and black pepper. Set aside.\\n5. In another bowl, combine breadcrumbs and olive oil. Dip each spaghetti into the egg mixture and then coat in the breadcrumb mixture. Place on baking sheet lined with parchment paper to prevent sticking. Repeat until all spaghetti are coated.\\n6. Heat oven to 375 degrees. Bake for 18 to 20 minutes, or until lightly golden brown.\\n7. Serve hot with meatballs and sauce on the side. Enjoy!'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_community.llms.llamafile import Llamafile\n",
+    "\n",
+    "llamafile = Llamafile()\n",
+    "\n",
+    "llamafile.invoke(\"Here is my grandmother's beloved recipe for spaghetti and meatballs:\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "d58838ae",
@ -702,7 +758,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.7"
  }
 },
 "nbformat": 4,
--- a/libs/community/langchain_community/chat_message_histories/streamlit.py
+++ b/libs/community/langchain_community/chat_message_histories/streamlit.py
@ -22,17 +22,26 @@ class StreamlitChatMessageHistory(BaseChatMessageHistory):

        if key not in st.session_state:
            st.session_state[key] = []
-        self._messages = st.session_state[key]
+        self._key = key

    @property
-    def messages(self) -> List[BaseMessage]:  # type: ignore
+    def messages(self) -> List[BaseMessage]:
        """Retrieve the current list of messages"""
-        return self._messages
+        import streamlit as st
+
+        return st.session_state[self._key]
+
+    @messages.setter
+    def messages(self, value: List[BaseMessage]) -> None:
+        """Set the messages list with a new value"""
+        import streamlit as st
+
+        st.session_state[self._key] = value

    def add_message(self, message: BaseMessage) -> None:
        """Add a message to the session memory"""
-        self._messages.append(message)
+        self.messages.append(message)

    def clear(self) -> None:
        """Clear session memory"""
-        self._messages.clear()
+        self.messages.clear()
--- a/libs/community/langchain_community/chat_models/init.py
+++ b/libs/community/langchain_community/chat_models/init.py
@ -43,12 +43,14 @@ from langchain_community.chat_models.konko import ChatKonko
 from langchain_community.chat_models.litellm import ChatLiteLLM
 from langchain_community.chat_models.litellm_router import ChatLiteLLMRouter
 from langchain_community.chat_models.llama_edge import LlamaEdgeChatService
+from langchain_community.chat_models.maritalk import ChatMaritalk
 from langchain_community.chat_models.minimax import MiniMaxChat
 from langchain_community.chat_models.mlflow import ChatMlflow
 from langchain_community.chat_models.mlflow_ai_gateway import ChatMLflowAIGateway
 from langchain_community.chat_models.ollama import ChatOllama
 from langchain_community.chat_models.openai import ChatOpenAI
 from langchain_community.chat_models.pai_eas_endpoint import PaiEasChatEndpoint
+from langchain_community.chat_models.perplexity import ChatPerplexity
 from langchain_community.chat_models.promptlayer_openai import PromptLayerChatOpenAI
 from langchain_community.chat_models.sparkllm import ChatSparkLLM
 from langchain_community.chat_models.tongyi import ChatTongyi
@ -98,5 +100,7 @@ __all__ = [
    "GPTRouter",
    "ChatYuan2",
    "ChatZhipuAI",
+    "ChatPerplexity",
    "ChatKinetica",
+    "ChatMaritalk",
 ]
--- a/libs/community/langchain_community/chat_models/maritalk.py
+++ b/libs/community/langchain_community/chat_models/maritalk.py
@ -0,0 +1,151 @@
+from typing import Any, Dict, List, Optional, Union
+
+import requests
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models.chat_models import SimpleChatModel
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
+from langchain_core.pydantic_v1 import Field
+
+
+class ChatMaritalk(SimpleChatModel):
+    """`MariTalk` Chat models API.
+
+    This class allows interacting with the MariTalk chatbot API.
+    To use it, you must provide an API key either through the constructor.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.chat_models import ChatMaritalk
+            chat = ChatMaritalk(api_key="your_api_key_here")
+    """
+
+    api_key: str
+    """Your MariTalk API key."""
+
+    temperature: float = Field(default=0.7, gt=0.0, lt=1.0)
+    """Run inference with this temperature. 
+    Must be in the closed interval [0.0, 1.0]."""
+
+    max_tokens: int = Field(default=512, gt=0)
+    """The maximum number of tokens to generate in the reply."""
+
+    do_sample: bool = Field(default=True)
+    """Whether or not to use sampling; use `True` to enable."""
+
+    top_p: float = Field(default=0.95, gt=0.0, lt=1.0)
+    """Nucleus sampling parameter controlling the size of 
+    the probability mass considered for sampling."""
+
+    system_message_workaround: bool = Field(default=True)
+    """Whether to include a workaround for system messages 
+    by adding them as a user message."""
+
+    @property
+    def _llm_type(self) -> str:
+        """Identifies the LLM type as 'maritalk'."""
+        return "maritalk"
+
+    def parse_messages_for_model(
+        self, messages: List[BaseMessage]
+    ) -> List[Dict[str, Union[str, List[Union[str, Dict[Any, Any]]]]]]:
+        """
+        Parses messages from LangChain's format to the format expected by
+        the MariTalk API.
+
+        Parameters:
+            messages (List[BaseMessage]): A list of messages in LangChain
+            format to be parsed.
+
+        Returns:
+            A list of messages formatted for the MariTalk API.
+        """
+        parsed_messages = []
+
+        for message in messages:
+            if isinstance(message, HumanMessage):
+                parsed_messages.append({"role": "user", "content": message.content})
+            elif isinstance(message, AIMessage):
+                parsed_messages.append(
+                    {"role": "assistant", "content": message.content}
+                )
+            elif isinstance(message, SystemMessage) and self.system_message_workaround:
+                # Maritalk models do not understand system message.
+                # #Instead we add these messages as user messages.
+                parsed_messages.append({"role": "user", "content": message.content})
+                parsed_messages.append({"role": "assistant", "content": "ok"})
+
+        return parsed_messages
+
+    def _call(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """
+        Sends the parsed messages to the MariTalk API and returns the generated
+        response or an error message.
+
+        This method makes an HTTP POST request to the MariTalk API with the
+        provided messages and other parameters.
+        If the request is successful and the API returns a response,
+        this method returns a string containing the answer.
+        If the request is rate-limited or encounters another error,
+        it returns a string with the error message.
+
+        Parameters:
+            messages (List[BaseMessage]): Messages to send to the model.
+            stop (Optional[List[str]]): Tokens that will signal the model
+                to stop generating further tokens.
+
+        Returns:
+            str: If the API call is successful, returns the answer.
+                 If an error occurs (e.g., rate limiting), returns a string
+                 describing the error.
+        """
+        try:
+            url = "https://chat.maritaca.ai/api/chat/inference"
+            headers = {"authorization": f"Key {self.api_key}"}
+            stopping_tokens = stop if stop is not None else []
+
+            parsed_messages = self.parse_messages_for_model(messages)
+
+            data = {
+                "messages": parsed_messages,
+                "do_sample": self.do_sample,
+                "max_tokens": self.max_tokens,
+                "temperature": self.temperature,
+                "top_p": self.top_p,
+                "stopping_tokens": stopping_tokens,
+                **kwargs,
+            }
+
+            response = requests.post(url, json=data, headers=headers)
+            if response.status_code == 429:
+                return "Rate limited, please try again soon"
+            elif response.ok:
+                return response.json().get("answer", "No answer found")
+
+        except requests.exceptions.RequestException as e:
+            return f"An error occurred: {str(e)}"
+
+        # Fallback return statement, in case of unexpected code paths
+        return "An unexpected error occurred"
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """
+        Identifies the key parameters of the chat model for logging
+        or tracking purposes.
+
+        Returns:
+            A dictionary of the key configuration parameters.
+        """
+        return {
+            "system_message_workaround": self.system_message_workaround,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "max_tokens": self.max_tokens,
+        }
--- a/libs/community/langchain_community/chat_models/perplexity.py
+++ b/libs/community/langchain_community/chat_models/perplexity.py
@ -0,0 +1,271 @@
+"""Wrapper around Perplexity APIs."""
+from __future__ import annotations
+
+import logging
+from typing import (
+    Any,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models.chat_models import (
+    BaseChatModel,
+    generate_from_stream,
+)
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    BaseMessageChunk,
+    ChatMessage,
+    ChatMessageChunk,
+    FunctionMessageChunk,
+    HumanMessage,
+    HumanMessageChunk,
+    SystemMessage,
+    SystemMessageChunk,
+    ToolMessageChunk,
+)
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+from langchain_core.pydantic_v1 import Field, root_validator
+from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names
+
+logger = logging.getLogger(__name__)
+
+
+class ChatPerplexity(BaseChatModel):
+    """`Perplexity AI` Chat models API.
+
+    To use, you should have the ``openai`` python package installed, and the
+    environment variable ``PPLX_API_KEY`` set to your API key.
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.chat_models import ChatPerplexity
+
+            chat = ChatPerplexity(model="pplx-70b-online", temperature=0.7)
+    """
+
+    client: Any  #: :meta private:
+    model: str = "pplx-70b-online"
+    """Model name."""
+    temperature: float = 0.7
+    """What sampling temperature to use."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+    pplx_api_key: Optional[str] = None
+    """Base URL path for API requests, 
+    leave blank if not using a proxy or service emulator."""
+    request_timeout: Optional[Union[float, Tuple[float, float]]] = None
+    """Timeout for requests to PerplexityChat completion API. Default is 600 seconds."""
+    max_retries: int = 6
+    """Maximum number of retries to make when generating."""
+    streaming: bool = False
+    """Whether to stream the results or not."""
+    max_tokens: Optional[int] = None
+    """Maximum number of tokens to generate."""
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        allow_population_by_field_name = True
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        return {"pplx_api_key": "PPLX_API_KEY"}
+
+    @root_validator(pre=True, allow_reuse=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = get_pydantic_field_names(cls)
+        extra = values.get("model_kwargs", {})
+        for field_name in list(values):
+            if field_name in extra:
+                raise ValueError(f"Found {field_name} supplied twice.")
+            if field_name not in all_required_field_names:
+                logger.warning(
+                    f"""WARNING! {field_name} is not a default parameter.
+                    {field_name} was transferred to model_kwargs.
+                    Please confirm that {field_name} is what you intended."""
+                )
+                extra[field_name] = values.pop(field_name)
+
+        invalid_model_kwargs = all_required_field_names.intersection(extra.keys())
+        if invalid_model_kwargs:
+            raise ValueError(
+                f"Parameters {invalid_model_kwargs} should be specified explicitly. "
+                f"Instead they were passed in as part of `model_kwargs` parameter."
+            )
+
+        values["model_kwargs"] = extra
+        return values
+
+    @root_validator(allow_reuse=True)
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        values["pplx_api_key"] = get_from_dict_or_env(
+            values, "pplx_api_key", "PPLX_API_KEY"
+        )
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Could not import openai python package. "
+                "Please install it with `pip install openai`."
+            )
+        try:
+            values["client"] = openai.OpenAI(
+                api_key=values["pplx_api_key"], base_url="https://api.perplexity.ai"
+            )
+        except AttributeError:
+            raise ValueError(
+                "`openai` has no `ChatCompletion` attribute, this is likely "
+                "due to an old version of the openai package. Try upgrading it "
+                "with `pip install --upgrade openai`."
+            )
+        return values
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling PerplexityChat API."""
+        return {
+            "request_timeout": self.request_timeout,
+            "max_tokens": self.max_tokens,
+            "stream": self.streaming,
+            "temperature": self.temperature,
+            **self.model_kwargs,
+        }
+
+    def _convert_message_to_dict(self, message: BaseMessage) -> Dict[str, Any]:
+        if isinstance(message, ChatMessage):
+            message_dict = {"role": message.role, "content": message.content}
+        elif isinstance(message, SystemMessage):
+            message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, HumanMessage):
+            message_dict = {"role": "user", "content": message.content}
+        elif isinstance(message, AIMessage):
+            message_dict = {"role": "assistant", "content": message.content}
+        else:
+            raise TypeError(f"Got unknown type {message}")
+        return message_dict
+
+    def _create_message_dicts(
+        self, messages: List[BaseMessage], stop: Optional[List[str]]
+    ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
+        params = dict(self._invocation_params)
+        if stop is not None:
+            if "stop" in params:
+                raise ValueError("`stop` found in both the input and default params.")
+            params["stop"] = stop
+        message_dicts = [self._convert_message_to_dict(m) for m in messages]
+        return message_dicts, params
+
+    def _convert_delta_to_message_chunk(
+        self, _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
+    ) -> BaseMessageChunk:
+        role = _dict.get("role")
+        content = _dict.get("content") or ""
+        additional_kwargs: Dict = {}
+        if _dict.get("function_call"):
+            function_call = dict(_dict["function_call"])
+            if "name" in function_call and function_call["name"] is None:
+                function_call["name"] = ""
+            additional_kwargs["function_call"] = function_call
+        if _dict.get("tool_calls"):
+            additional_kwargs["tool_calls"] = _dict["tool_calls"]
+
+        if role == "user" or default_class == HumanMessageChunk:
+            return HumanMessageChunk(content=content)
+        elif role == "assistant" or default_class == AIMessageChunk:
+            return AIMessageChunk(content=content, additional_kwargs=additional_kwargs)
+        elif role == "system" or default_class == SystemMessageChunk:
+            return SystemMessageChunk(content=content)
+        elif role == "function" or default_class == FunctionMessageChunk:
+            return FunctionMessageChunk(content=content, name=_dict["name"])
+        elif role == "tool" or default_class == ToolMessageChunk:
+            return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])
+        elif role or default_class == ChatMessageChunk:
+            return ChatMessageChunk(content=content, role=role)
+        else:
+            return default_class(content=content)
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {**params, **kwargs}
+        default_chunk_class = AIMessageChunk
+
+        if stop:
+            params["stop_sequences"] = stop
+        stream_resp = self.client.chat.completions.create(
+            model=params["model"], messages=message_dicts, stream=True
+        )
+        for chunk in stream_resp:
+            if not isinstance(chunk, dict):
+                chunk = chunk.dict()
+            if len(chunk["choices"]) == 0:
+                continue
+            choice = chunk["choices"][0]
+            chunk = self._convert_delta_to_message_chunk(
+                choice["delta"], default_chunk_class
+            )
+            finish_reason = choice.get("finish_reason")
+            generation_info = (
+                dict(finish_reason=finish_reason) if finish_reason is not None else None
+            )
+            default_chunk_class = chunk.__class__
+            chunk = ChatGenerationChunk(message=chunk, generation_info=generation_info)
+            yield chunk
+            if run_manager:
+                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        if self.streaming:
+            stream_iter = self._stream(
+                messages, stop=stop, run_manager=run_manager, **kwargs
+            )
+            if stream_iter:
+                return generate_from_stream(stream_iter)
+        message_dicts, params = self._create_message_dicts(messages, stop)
+        params = {**params, **kwargs}
+        response = self.client.chat.completions.create(
+            model=params["model"], messages=message_dicts
+        )
+        message = AIMessage(content=response.choices[0].message.content)
+        return ChatResult(generations=[ChatGeneration(message=message)])
+
+    @property
+    def _invocation_params(self) -> Mapping[str, Any]:
+        """Get the parameters used to invoke the model."""
+        pplx_creds: Dict[str, Any] = {
+            "api_key": self.pplx_api_key,
+            "api_base": "https://api.perplexity.ai",
+            "model": self.model,
+        }
+        return {**pplx_creds, **self._default_params}
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+        return "perplexitychat"
--- a/libs/community/langchain_community/document_loaders/recursive_url_loader.py
+++ b/libs/community/langchain_community/document_loaders/recursive_url_loader.py
@ -86,7 +86,7 @@ class RecursiveUrlLoader(BaseLoader):
        max_depth: Optional[int] = 2,
        use_async: Optional[bool] = None,
        extractor: Optional[Callable[[str], str]] = None,
-        metadata_extractor: Optional[Callable[[str, str], str]] = None,
+        metadata_extractor: Optional[Callable[[str, str], dict]] = None,
        exclude_dirs: Optional[Sequence[str]] = (),
        timeout: Optional[int] = 10,
        prevent_outside: bool = True,
--- a/libs/community/langchain_community/embeddings/init.py
+++ b/libs/community/langchain_community/embeddings/init.py
@ -57,6 +57,7 @@ from langchain_community.embeddings.jina import JinaEmbeddings
 from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings
 from langchain_community.embeddings.laser import LaserEmbeddings
 from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings
+from langchain_community.embeddings.llamafile import LlamafileEmbeddings
 from langchain_community.embeddings.llm_rails import LLMRailsEmbeddings
 from langchain_community.embeddings.localai import LocalAIEmbeddings
 from langchain_community.embeddings.minimax import MiniMaxEmbeddings
@ -112,6 +113,7 @@ __all__ = [
    "JinaEmbeddings",
    "LaserEmbeddings",
    "LlamaCppEmbeddings",
+    "LlamafileEmbeddings",
    "LLMRailsEmbeddings",
    "HuggingFaceHubEmbeddings",
    "MlflowEmbeddings",
--- a/libs/community/langchain_community/embeddings/llamafile.py
+++ b/libs/community/langchain_community/embeddings/llamafile.py
@ -0,0 +1,119 @@
+import logging
+from typing import List, Optional
+
+import requests
+from langchain_core.embeddings import Embeddings
+from langchain_core.pydantic_v1 import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+class LlamafileEmbeddings(BaseModel, Embeddings):
+    """Llamafile lets you distribute and run large language models with a
+    single file.
+
+    To get started, see: https://github.com/Mozilla-Ocho/llamafile
+
+    To use this class, you will need to first:
+
+    1. Download a llamafile.
+    2. Make the downloaded file executable: `chmod +x path/to/model.llamafile`
+    3. Start the llamafile in server mode with embeddings enabled:
+
+        `./path/to/model.llamafile --server --nobrowser --embedding`
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.embeddings import LlamafileEmbeddings
+            embedder = LlamafileEmbeddings()
+            doc_embeddings = embedder.embed_documents(
+                [
+                    "Alpha is the first letter of the Greek alphabet",
+                    "Beta is the second letter of the Greek alphabet",
+                ]
+            )
+            query_embedding = embedder.embed_query(
+                "What is the second letter of the Greek alphabet"
+            )
+
+    """
+
+    base_url: str = "http://localhost:8080"
+    """Base url where the llamafile server is listening."""
+
+    request_timeout: Optional[int] = None
+    """Timeout for server requests"""
+
+    def _embed(self, text: str) -> List[float]:
+        try:
+            response = requests.post(
+                url=f"{self.base_url}/embedding",
+                headers={
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "content": text,
+                },
+                timeout=self.request_timeout,
+            )
+        except requests.exceptions.ConnectionError:
+            raise requests.exceptions.ConnectionError(
+                f"Could not connect to Llamafile server. Please make sure "
+                f"that a server is running at {self.base_url}."
+            )
+
+        # Raise exception if we got a bad (non-200) response status code
+        response.raise_for_status()
+
+        contents = response.json()
+        if "embedding" not in contents:
+            raise KeyError(
+                "Unexpected output from /embedding endpoint, output dict "
+                "missing 'embedding' key."
+            )
+
+        embedding = contents["embedding"]
+
+        # Sanity check the embedding vector:
+        # Prior to llamafile v0.6.2, if the server was not started with the
+        # `--embedding` option, the embedding endpoint would always return a
+        # 0-vector. See issue:
+        # https://github.com/Mozilla-Ocho/llamafile/issues/243
+        # So here we raise an exception if the vector sums to exactly 0.
+        if sum(embedding) == 0.0:
+            raise ValueError(
+                "Embedding sums to 0, did you start the llamafile server with "
+                "the `--embedding` option enabled?"
+            )
+
+        return embedding
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents using a llamafile server running at `self.base_url`.
+        llamafile server should be started in a separate process before invoking
+        this method.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        doc_embeddings = []
+        for text in texts:
+            doc_embeddings.append(self._embed(text))
+        return doc_embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        """Embed a query using a llamafile server running at `self.base_url`.
+        llamafile server should be started in a separate process before invoking
+        this method.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        return self._embed(text)
--- a/libs/community/langchain_community/graphs/init.py
+++ b/libs/community/langchain_community/graphs/init.py
@ -2,6 +2,7 @@

 from langchain_community.graphs.arangodb_graph import ArangoGraph
 from langchain_community.graphs.falkordb_graph import FalkorDBGraph
+from langchain_community.graphs.gremlin_graph import GremlinGraph
 from langchain_community.graphs.hugegraph import HugeGraph
 from langchain_community.graphs.kuzu_graph import KuzuGraph
 from langchain_community.graphs.memgraph_graph import MemgraphGraph
@ -28,4 +29,5 @@ __all__ = [
    "FalkorDBGraph",
    "TigerGraph",
    "OntotextGraphDBGraph",
+    "GremlinGraph",
 ]
--- a/libs/community/langchain_community/graphs/gremlin_graph.py
+++ b/libs/community/langchain_community/graphs/gremlin_graph.py
@ -0,0 +1,207 @@
+import hashlib
+import sys
+from typing import Any, Dict, List, Optional, Union
+
+from langchain_core.utils import get_from_env
+
+from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
+from langchain_community.graphs.graph_store import GraphStore
+
+
+class GremlinGraph(GraphStore):
+    """Gremlin wrapper for graph operations.
+    Parameters:
+    url (Optional[str]): The URL of the Gremlin database server or env GREMLIN_URI
+    username (Optional[str]): The collection-identifier like '/dbs/database/colls/graph'
+                               or env GREMLIN_USERNAME if none provided
+    password (Optional[str]): The connection-key for database authentication
+                              or env GREMLIN_PASSWORD if none provided
+    traversal_source (str): The traversal source to use for queries. Defaults to 'g'.
+    message_serializer (Optional[Any]): The message serializer to use for requests.
+                                        Defaults to serializer.GraphSONSerializersV2d0()
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include necessary permissions.
+        Failure to do so may result in data corruption or loss, since the calling
+        code may attempt commands that would result in deletion, mutation
+        of data if appropriately prompted or reading sensitive data if such
+        data is present in the database.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this tool.
+
+        See https://python.langchain.com/docs/security for more information.
+
+    *Implementation details*:
+        The Gremlin queries are designed to work with Azure CosmosDB limitations
+    """
+
+    @property
+    def get_structured_schema(self) -> Dict[str, Any]:
+        return self.structured_schema
+
+    def __init__(
+        self,
+        url: Optional[str] = None,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        traversal_source: str = "g",
+        message_serializer: Optional[Any] = None,
+    ) -> None:
+        """Create a new Gremlin graph wrapper instance."""
+        try:
+            import asyncio
+
+            from gremlin_python.driver import client, serializer
+
+            if sys.platform == "win32":
+                asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+        except ImportError:
+            raise ValueError(
+                "Please install gremlin-python first: " "`pip3 install gremlinpython"
+            )
+
+        self.client = client.Client(
+            url=get_from_env("url", "GREMLIN_URI", url),
+            traversal_source=traversal_source,
+            username=get_from_env("username", "GREMLIN_USERNAME", username),
+            password=get_from_env("password", "GREMLIN_PASSWORD", password),
+            message_serializer=message_serializer
+            if message_serializer
+            else serializer.GraphSONSerializersV2d0(),
+        )
+        self.schema: str = ""
+
+    @property
+    def get_schema(self) -> str:
+        """Returns the schema of the Gremlin database"""
+        if len(self.schema) == 0:
+            self.refresh_schema()
+        return self.schema
+
+    def refresh_schema(self) -> None:
+        """
+        Refreshes the Gremlin graph schema information.
+        """
+        vertex_schema = self.client.submit("g.V().label().dedup()").all().result()
+        edge_schema = self.client.submit("g.E().label().dedup()").all().result()
+        vertex_properties = (
+            self.client.submit(
+                "g.V().group().by(label).by(properties().label().dedup().fold())"
+            )
+            .all()
+            .result()[0]
+        )
+
+        self.structured_schema = {
+            "vertex_labels": vertex_schema,
+            "edge_labels": edge_schema,
+            "vertice_props": vertex_properties,
+        }
+
+        self.schema = "\n".join(
+            [
+                "Vertex labels are the following:",
+                ",".join(vertex_schema),
+                "Edge labes are the following:",
+                ",".join(edge_schema),
+                f"Vertices have following properties:\n{vertex_properties}",
+            ]
+        )
+
+    def query(self, query: str, params: dict = {}) -> List[Dict[str, Any]]:
+        q = self.client.submit(query)
+        return q.all().result()
+
+    def add_graph_documents(
+        self, graph_documents: List[GraphDocument], include_source: bool = False
+    ) -> None:
+        """
+        Take GraphDocument as input as uses it to construct a graph.
+        """
+        node_cache: Dict[Union[str, int], Node] = {}
+        for document in graph_documents:
+            if include_source:
+                # Create document vertex
+                doc_props = {
+                    "page_content": document.source.page_content,
+                    "metadata": document.source.metadata,
+                }
+                doc_id = hashlib.md5(document.source.page_content.encode()).hexdigest()
+                doc_node = self.add_node(
+                    Node(id=doc_id, type="Document", properties=doc_props), node_cache
+                )
+
+            # Import nodes to vertices
+            for n in document.nodes:
+                node = self.add_node(n)
+                if include_source:
+                    # Add Edge to document for each node
+                    self.add_edge(
+                        Relationship(
+                            type="contains information about",
+                            source=doc_node,
+                            target=node,
+                            properties={},
+                        )
+                    )
+                    self.add_edge(
+                        Relationship(
+                            type="is extracted from",
+                            source=node,
+                            target=doc_node,
+                            properties={},
+                        )
+                    )
+
+            # Edges
+            for el in document.relationships:
+                # Find or create the source vertex
+                self.add_node(el.source, node_cache)
+                # Find or create the target vertex
+                self.add_node(el.target, node_cache)
+                # Find or create the edge
+                self.add_edge(el)
+
+    def build_vertex_query(self, node: Node) -> str:
+        base_query = (
+            f"g.V().has('id','{node.id}').fold()"
+            + f".coalesce(unfold(),addV('{node.type}')"
+            + f".property('id','{node.id}')"
+            + f".property('type','{node.type}')"
+        )
+        for key, value in node.properties.items():
+            base_query += f".property('{key}', '{value}')"
+
+        return base_query + ")"
+
+    def build_edge_query(self, relationship: Relationship) -> str:
+        source_query = f".has('id','{relationship.source.id}')"
+        target_query = f".has('id','{relationship.target.id}')"
+
+        base_query = f""""g.V(){source_query}.as('a')  
+            .V(){target_query}.as('b') 
+            .choose(
+                __.inE('{relationship.type}').where(outV().as('a')),
+                __.identity(),
+                __.addE('{relationship.type}').from('a').to('b')
+            )        
+            """.replace("\n", "").replace("\t", "")
+        for key, value in relationship.properties.items():
+            base_query += f".property('{key}', '{value}')"
+
+        return base_query
+
+    def add_node(self, node: Node, node_cache: dict = {}) -> Node:
+        # if properties does not have label, add type as label
+        if "label" not in node.properties:
+            node.properties["label"] = node.type
+        if node.id in node_cache:
+            return node_cache[node.id]
+        else:
+            query = self.build_vertex_query(node)
+            _ = self.client.submit(query).all().result()[0]
+            node_cache[node.id] = node
+            return node
+
+    def add_edge(self, relationship: Relationship) -> Any:
+        query = self.build_edge_query(relationship)
+        return self.client.submit(query).all().result()
--- a/libs/community/langchain_community/graphs/neo4j_graph.py
+++ b/libs/community/langchain_community/graphs/neo4j_graph.py
@ -5,10 +5,13 @@ from langchain_core.utils import get_from_env
 from langchain_community.graphs.graph_document import GraphDocument
 from langchain_community.graphs.graph_store import GraphStore

+BASE_ENTITY_LABEL = "__Entity__"
+
 node_properties_query = """
 CALL apoc.meta.data()
 YIELD label, other, elementType, type, property
-WHERE NOT type = "RELATIONSHIP" AND elementType = "node"
+WHERE NOT type = "RELATIONSHIP" AND elementType = "node" 
+  AND NOT label IN [$BASE_ENTITY_LABEL]
 WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
 RETURN {labels: nodeLabels, properties: properties} AS output

@ -27,9 +30,18 @@ CALL apoc.meta.data()
 YIELD label, other, elementType, type, property
 WHERE type = "RELATIONSHIP" AND elementType = "node"
 UNWIND other AS other_node
+WITH * WHERE NOT label IN [$BASE_ENTITY_LABEL] 
+    AND NOT other_node IN [$BASE_ENTITY_LABEL]
 RETURN {start: label, type: property, end: toString(other_node)} AS output
 """

+include_docs_query = (
+    "CREATE (d:Document) "
+    "SET d.text = $document.page_content "
+    "SET d += $document.metadata "
+    "WITH d "
+)
+

 def value_sanitize(d: Dict[str, Any]) -> Dict[str, Any]:
    """Sanitize the input dictionary.
@ -63,6 +75,53 @@ def value_sanitize(d: Dict[str, Any]) -> Dict[str, Any]:
    return new_dict


+def _get_node_import_query(baseEntityLabel: bool, include_source: bool) -> str:
+    if baseEntityLabel:
+        return (
+            f"{include_docs_query if include_source else ''}"
+            "UNWIND $data AS row "
+            f"MERGE (source:`{BASE_ENTITY_LABEL}` {{id: row.id}}) "
+            "SET source += row.properties "
+            f"{'MERGE (d)-[:MENTIONS]->(source) ' if include_source else ''}"
+            "WITH source, row "
+            "CALL apoc.create.addLabels( source, [row.type] ) YIELD node "
+            "RETURN distinct 'done' AS result"
+        )
+    else:
+        return (
+            f"{include_docs_query if include_source else ''}"
+            "UNWIND $data AS row "
+            "CALL apoc.merge.node([row.type], {id: row.id}, "
+            "row.properties, {}) YIELD node "
+            f"{'MERGE (d)-[:MENTIONS]->(node) ' if include_source else ''}"
+            "RETURN distinct 'done' AS result"
+        )
+
+
+def _get_rel_import_query(baseEntityLabel: bool) -> str:
+    if baseEntityLabel:
+        return (
+            "UNWIND $data AS row "
+            f"MERGE (source:`{BASE_ENTITY_LABEL}` {{id: row.source}}) "
+            f"MERGE (target:`{BASE_ENTITY_LABEL}` {{id: row.target}}) "
+            "WITH source, target, row "
+            "CALL apoc.merge.relationship(source, row.type, "
+            "{}, row.properties, target) YIELD rel "
+            "RETURN distinct 'done'"
+        )
+    else:
+        return (
+            "UNWIND $data AS row "
+            "CALL apoc.merge.node([row.source_label], {id: row.source},"
+            "{}, {}) YIELD node as source "
+            "CALL apoc.merge.node([row.target_label], {id: row.target},"
+            "{}, {}) YIELD node as target "
+            "CALL apoc.merge.relationship(source, row.type, "
+            "{}, row.properties, target) YIELD rel "
+            "RETURN distinct 'done'"
+        )
+
+
 class Neo4jGraph(GraphStore):
    """Neo4j database wrapper for various graph operations.

@ -173,14 +232,42 @@ class Neo4jGraph(GraphStore):
        """
        Refreshes the Neo4j graph schema information.
        """
-        node_properties = [el["output"] for el in self.query(node_properties_query)]
-        rel_properties = [el["output"] for el in self.query(rel_properties_query)]
-        relationships = [el["output"] for el in self.query(rel_query)]
+        from neo4j.exceptions import ClientError
+
+        node_properties = [
+            el["output"]
+            for el in self.query(
+                node_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+            )
+        ]
+        rel_properties = [
+            el["output"]
+            for el in self.query(
+                rel_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+            )
+        ]
+        relationships = [
+            el["output"]
+            for el in self.query(
+                rel_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+            )
+        ]
+
+        # Get constraints & indexes
+        try:
+            constraint = self.query("SHOW CONSTRAINTS")
+            index = self.query("SHOW INDEXES YIELD *")
+        except (
+            ClientError
+        ):  # Read-only user might not have access to schema information
+            constraint = []
+            index = []

        self.structured_schema = {
            "node_props": {el["labels"]: el["properties"] for el in node_properties},
            "rel_props": {el["type"]: el["properties"] for el in rel_properties},
            "relationships": relationships,
+            "metadata": {"constraint": constraint, "index": index},
        }

        # Format node properties
@ -216,28 +303,51 @@ class Neo4jGraph(GraphStore):
        )

    def add_graph_documents(
-        self, graph_documents: List[GraphDocument], include_source: bool = False
+        self,
+        graph_documents: List[GraphDocument],
+        include_source: bool = False,
+        baseEntityLabel: bool = False,
    ) -> None:
        """
-        Take GraphDocument as input as uses it to construct a graph.
+        This method constructs nodes and relationships in the graph based on the
+        provided GraphDocument objects.
+
+        Parameters:
+        - graph_documents (List[GraphDocument]): A list of GraphDocument objects
+        that contain the nodes and relationships to be added to the graph. Each
+        GraphDocument should encapsulate the structure of part of the graph,
+        including nodes, relationships, and the source document information.
+        - include_source (bool, optional): If True, stores the source document
+        and links it to nodes in the graph using the MENTIONS relationship.
+        This is useful for tracing back the origin of data. Defaults to False.
+        - baseEntityLabel (bool, optional): If True, each newly created node
+        gets a secondary __Entity__ label, which is indexed and improves import
+        speed and performance. Defaults to False.
        """
-        for document in graph_documents:
-            include_docs_query = (
-                "CREATE (d:Document) "
-                "SET d.text = $document.page_content "
-                "SET d += $document.metadata "
-                "WITH d "
+        if baseEntityLabel:  # Check if constraint already exists
+            constraint_exists = any(
+                [
+                    el["labelsOrTypes"] == [BASE_ENTITY_LABEL]
+                    and el["properties"] == ["id"]
+                    for el in self.structured_schema.get("metadata", {}).get(
+                        "constraint"
+                    )
+                ]
            )
+            if not constraint_exists:
+                # Create constraint
+                self.query(
+                    f"CREATE CONSTRAINT IF NOT EXISTS FOR (b:{BASE_ENTITY_LABEL}) "
+                    "REQUIRE b.id IS UNIQUE;"
+                )
+                self.refresh_schema()  # Refresh constraint information
+
+        node_import_query = _get_node_import_query(baseEntityLabel, include_source)
+        rel_import_query = _get_rel_import_query(baseEntityLabel)
+        for document in graph_documents:
            # Import nodes
            self.query(
-                (
-                    f"{include_docs_query if include_source else ''}"
-                    "UNWIND $data AS row "
-                    "CALL apoc.merge.node([row.type], {id: row.id}, "
-                    "row.properties, {}) YIELD node "
-                    f"{'MERGE (d)-[:MENTIONS]->(node) ' if include_source else ''}"
-                    "RETURN distinct 'done' AS result"
-                ),
+                node_import_query,
                {
                    "data": [el.__dict__ for el in document.nodes],
                    "document": document.source.__dict__,
@ -245,14 +355,7 @@ class Neo4jGraph(GraphStore):
            )
            # Import relationships
            self.query(
-                "UNWIND $data AS row "
-                "CALL apoc.merge.node([row.source_label], {id: row.source},"
-                "{}, {}) YIELD node as source "
-                "CALL apoc.merge.node([row.target_label], {id: row.target},"
-                "{}, {}) YIELD node as target "
-                "CALL apoc.merge.relationship(source, row.type, "
-                "{}, row.properties, target) YIELD rel "
-                "RETURN distinct 'done'",
+                rel_import_query,
                {
                    "data": [
                        {
--- a/libs/community/langchain_community/llms/init.py
+++ b/libs/community/langchain_community/llms/init.py
@ -295,6 +295,12 @@ def _import_llamacpp() -> Type[BaseLLM]:
    return LlamaCpp


+def _import_llamafile() -> Type[BaseLLM]:
+    from langchain_community.llms.llamafile import Llamafile
+
+    return Llamafile
+
+
 def _import_manifest() -> Type[BaseLLM]:
    from langchain_community.llms.manifest import ManifestWrapper

--- a/libs/community/langchain_community/llms/cohere.py
+++ b/libs/community/langchain_community/llms/cohere.py
@ -9,8 +9,8 @@ from langchain_core.callbacks import (
 )
 from langchain_core.language_models.llms import LLM
 from langchain_core.load.serializable import Serializable
-from langchain_core.pydantic_v1 import Extra, Field, root_validator
-from langchain_core.utils import get_from_dict_or_env
+from langchain_core.pydantic_v1 import Extra, Field, SecretStr, root_validator
+from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
 from tenacity import (
    before_sleep_log,
    retry,
@ -73,7 +73,8 @@ class BaseCohere(Serializable):
    temperature: float = 0.75
    """A non-negative float that tunes the degree of randomness in generation."""

-    cohere_api_key: Optional[str] = None
+    cohere_api_key: Optional[SecretStr] = None
+    """Cohere API key. If not provided, will be read from the environment variable."""

    stop: Optional[List[str]] = None

@ -94,13 +95,17 @@ class BaseCohere(Serializable):
                "Please install it with `pip install cohere`."
            )
        else:
-            cohere_api_key = get_from_dict_or_env(
-                values, "cohere_api_key", "COHERE_API_KEY"
+            values["cohere_api_key"] = convert_to_secret_str(
+                get_from_dict_or_env(values, "cohere_api_key", "COHERE_API_KEY")
            )
            client_name = values["user_agent"]
-            values["client"] = cohere.Client(cohere_api_key, client_name=client_name)
+            values["client"] = cohere.Client(
+                api_key=values["cohere_api_key"].get_secret_value(),
+                client_name=client_name,
+            )
            values["async_client"] = cohere.AsyncClient(
-                cohere_api_key, client_name=client_name
+                api_key=values["cohere_api_key"].get_secret_value(),
+                client_name=client_name,
            )
        return values

--- a/libs/community/langchain_community/llms/ollama.py
+++ b/libs/community/langchain_community/llms/ollama.py
@ -236,7 +236,7 @@ class _OllamaCommon(BaseLanguageModel):
                    f"and you should pull the model with `ollama pull {self.model}`."
                )
            else:
-                optional_detail = response.json().get("error")
+                optional_detail = response.text
                raise ValueError(
                    f"Ollama call failed with status code {response.status_code}."
                    f" Details: {optional_detail}"
@ -297,7 +297,7 @@ class _OllamaCommon(BaseLanguageModel):
                            "Ollama call failed with status code 404."
                        )
                    else:
-                        optional_detail = await response.json().get("error")  # type: ignore[attr-defined]
+                        optional_detail = response.text
                        raise ValueError(
                            f"Ollama call failed with status code {response.status}."
                            f" Details: {optional_detail}"
--- a/libs/community/langchain_community/vectorstores/chroma.py
+++ b/libs/community/langchain_community/vectorstores/chroma.py
@ -209,7 +209,7 @@ class Chroma(VectorStore):
                    empty_ids.append(idx)
            if non_empty_ids:
                metadatas = [metadatas[idx] for idx in non_empty_ids]
-                images_with_metadatas = [uris[idx] for idx in non_empty_ids]
+                images_with_metadatas = [b64_texts[idx] for idx in non_empty_ids]
                embeddings_with_metadatas = (
                    [embeddings[idx] for idx in non_empty_ids] if embeddings else None
                )
@ -231,7 +231,7 @@ class Chroma(VectorStore):
                    else:
                        raise e
            if empty_ids:
-                images_without_metadatas = [uris[j] for j in empty_ids]
+                images_without_metadatas = [b64_texts[j] for j in empty_ids]
                embeddings_without_metadatas = (
                    [embeddings[j] for j in empty_ids] if embeddings else None
                )
--- a/libs/community/langchain_community/vectorstores/mongodb_atlas.py
+++ b/libs/community/langchain_community/vectorstores/mongodb_atlas.py
@ -215,7 +215,6 @@ class MongoDBAtlasVectorSearch(VectorStore):
        for res in cursor:
            text = res.pop(self._text_key)
            score = res.pop("score")
-            del res[self._embedding_key]
            docs.append((Document(page_content=text, metadata=res), score))
        return docs

--- a/libs/community/langchain_community/vectorstores/redis/base.py
+++ b/libs/community/langchain_community/vectorstores/redis/base.py
@ -1118,7 +1118,7 @@ class Redis(VectorStore):
        base_query = f"@{vector_key}:[VECTOR_RANGE $distance_threshold $vector]"

        if filter:
-            base_query = "(" + base_query + " " + str(filter) + ")"
+            base_query = str(filter) + " " + base_query

        query_string = base_query + "=>{$yield_distance_as: distance}"

--- a/libs/community/tests/integration_tests/graphs/test_neo4j.py
+++ b/libs/community/tests/integration_tests/graphs/test_neo4j.py
@ -1,12 +1,30 @@
 import os

+from langchain_core.documents import Document
+
 from langchain_community.graphs import Neo4jGraph
+from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
 from langchain_community.graphs.neo4j_graph import (
+    BASE_ENTITY_LABEL,
    node_properties_query,
    rel_properties_query,
    rel_query,
 )

+test_data = [
+    GraphDocument(
+        nodes=[Node(id="foo", type="foo"), Node(id="bar", type="bar")],
+        relationships=[
+            Relationship(
+                source=Node(id="foo", type="foo"),
+                target=Node(id="bar", type="bar"),
+                type="REL",
+            )
+        ],
+        source=Document(page_content="source document"),
+    )
+]
+

 def test_cypher_return_correct_schema() -> None:
    """Test that chain returns direct results."""
@ -37,9 +55,15 @@ def test_cypher_return_correct_schema() -> None:
    # Refresh schema information
    graph.refresh_schema()

-    node_properties = graph.query(node_properties_query)
-    relationships_properties = graph.query(rel_properties_query)
-    relationships = graph.query(rel_query)
+    node_properties = graph.query(
+        node_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+    )
+    relationships_properties = graph.query(
+        rel_properties_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+    )
+    relationships = graph.query(
+        rel_query, params={"BASE_ENTITY_LABEL": BASE_ENTITY_LABEL}
+    )

    expected_node_properties = [
        {
@ -116,3 +140,112 @@ def test_neo4j_sanitize_values() -> None:

    output = graph.query("RETURN range(0,130,1) AS result")
    assert output == [{}]
+
+
+def test_neo4j_add_data() -> None:
+    """Test that neo4j correctly import graph document."""
+    url = os.environ.get("NEO4J_URI")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(url=url, username=username, password=password, sanitize=True)
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Remove all constraints
+    graph.query("CALL apoc.schema.assert({}, {})")
+    graph.refresh_schema()
+    # Create two nodes and a relationship
+    graph.add_graph_documents(test_data)
+    output = graph.query(
+        "MATCH (n) RETURN labels(n) AS label, count(*) AS count ORDER BY label"
+    )
+    assert output == [{"label": ["bar"], "count": 1}, {"label": ["foo"], "count": 1}]
+    assert graph.structured_schema["metadata"]["constraint"] == []
+
+
+def test_neo4j_add_data_source() -> None:
+    """Test that neo4j correctly import graph document with source."""
+    url = os.environ.get("NEO4J_URI")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(url=url, username=username, password=password, sanitize=True)
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Remove all constraints
+    graph.query("CALL apoc.schema.assert({}, {})")
+    graph.refresh_schema()
+    # Create two nodes and a relationship
+    graph.add_graph_documents(test_data, include_source=True)
+    output = graph.query(
+        "MATCH (n) RETURN labels(n) AS label, count(*) AS count ORDER BY label"
+    )
+    assert output == [
+        {"label": ["Document"], "count": 1},
+        {"label": ["bar"], "count": 1},
+        {"label": ["foo"], "count": 1},
+    ]
+    assert graph.structured_schema["metadata"]["constraint"] == []
+
+
+def test_neo4j_add_data_base() -> None:
+    """Test that neo4j correctly import graph document with base_entity."""
+    url = os.environ.get("NEO4J_URI")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(url=url, username=username, password=password, sanitize=True)
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Remove all constraints
+    graph.query("CALL apoc.schema.assert({}, {})")
+    graph.refresh_schema()
+    # Create two nodes and a relationship
+    graph.add_graph_documents(test_data, baseEntityLabel=True)
+    output = graph.query(
+        "MATCH (n) RETURN apoc.coll.sort(labels(n)) AS label, "
+        "count(*) AS count ORDER BY label"
+    )
+    assert output == [
+        {"label": [BASE_ENTITY_LABEL, "bar"], "count": 1},
+        {"label": [BASE_ENTITY_LABEL, "foo"], "count": 1},
+    ]
+    assert graph.structured_schema["metadata"]["constraint"] != []
+
+
+def test_neo4j_add_data_base_source() -> None:
+    """Test that neo4j correctly import graph document with base_entity and source."""
+    url = os.environ.get("NEO4J_URI")
+    username = os.environ.get("NEO4J_USERNAME")
+    password = os.environ.get("NEO4J_PASSWORD")
+    assert url is not None
+    assert username is not None
+    assert password is not None
+
+    graph = Neo4jGraph(url=url, username=username, password=password, sanitize=True)
+    # Delete all nodes in the graph
+    graph.query("MATCH (n) DETACH DELETE n")
+    # Remove all constraints
+    graph.query("CALL apoc.schema.assert({}, {})")
+    graph.refresh_schema()
+    # Create two nodes and a relationship
+    graph.add_graph_documents(test_data, baseEntityLabel=True, include_source=True)
+    output = graph.query(
+        "MATCH (n) RETURN apoc.coll.sort(labels(n)) AS label, "
+        "count(*) AS count ORDER BY label"
+    )
+    assert output == [
+        {"label": ["Document"], "count": 1},
+        {"label": [BASE_ENTITY_LABEL, "bar"], "count": 1},
+        {"label": [BASE_ENTITY_LABEL, "foo"], "count": 1},
+    ]
+    assert graph.structured_schema["metadata"]["constraint"] != []
--- a/libs/community/tests/integration_tests/llms/test_cohere.py
+++ b/libs/community/tests/integration_tests/llms/test_cohere.py
@ -2,6 +2,9 @@

 from pathlib import Path

+from langchain_core.pydantic_v1 import SecretStr
+from pytest import MonkeyPatch
+
 from langchain_community.llms.cohere import Cohere
 from langchain_community.llms.loading import load_llm
 from tests.integration_tests.llms.utils import assert_llm_equality
@ -14,6 +17,16 @@ def test_cohere_call() -> None:
    assert isinstance(output, str)


+def test_cohere_api_key(monkeypatch: MonkeyPatch) -> None:
+    """Test that cohere api key is a secret key."""
+    # test initialization from init
+    assert isinstance(Cohere(cohere_api_key="1").cohere_api_key, SecretStr)
+
+    # test initialization from env variable
+    monkeypatch.setenv("COHERE_API_KEY", "secret-api-key")
+    assert isinstance(Cohere().cohere_api_key, SecretStr)
+
+
 def test_saving_loading_llm(tmp_path: Path) -> None:
    """Test saving/loading an Cohere LLM."""
    llm = Cohere(max_tokens=10)
--- a/libs/community/tests/unit_tests/chat_models/test_imports.py
+++ b/libs/community/tests/unit_tests/chat_models/test_imports.py
@ -13,6 +13,7 @@ EXPECTED_ALL = [
    "ChatDeepInfra",
    "ChatGooglePalm",
    "ChatHuggingFace",
+    "ChatMaritalk",
    "ChatMlflow",
    "ChatMLflowAIGateway",
    "ChatOllama",
@ -40,6 +41,7 @@ EXPECTED_ALL = [
    "GPTRouter",
    "ChatYuan2",
    "ChatZhipuAI",
+    "ChatPerplexity",
    "ChatKinetica",
 ]

--- a/libs/community/tests/unit_tests/chat_models/test_perplexity.py
+++ b/libs/community/tests/unit_tests/chat_models/test_perplexity.py
@ -0,0 +1,30 @@
+"""Test Perplexity Chat API wrapper."""
+import os
+
+import pytest
+
+from langchain_community.chat_models import ChatPerplexity
+
+os.environ["PPLX_API_KEY"] = "foo"
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_model_name_param() -> None:
+    llm = ChatPerplexity(model="foo")
+    assert llm.model == "foo"
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_model_kwargs() -> None:
+    llm = ChatPerplexity(model="test", model_kwargs={"foo": "bar"})
+    assert llm.model_kwargs == {"foo": "bar"}
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_initialization() -> None:
+    """Test perplexity initialization."""
+    # Verify that chat perplexity can be initialized using a secret key provided
+    # as a parameter rather than an environment variable.
+    ChatPerplexity(
+        model="test", perplexity_api_key="test", temperature=0.7, verbose=True
+    )
--- a/libs/community/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/community/tests/unit_tests/embeddings/test_imports.py
@ -17,6 +17,7 @@ EXPECTED_ALL = [
    "JinaEmbeddings",
    "LaserEmbeddings",
    "LlamaCppEmbeddings",
+    "LlamafileEmbeddings",
    "LLMRailsEmbeddings",
    "HuggingFaceHubEmbeddings",
    "MlflowAIGatewayEmbeddings",
--- a/libs/community/tests/unit_tests/embeddings/test_llamafile.py
+++ b/libs/community/tests/unit_tests/embeddings/test_llamafile.py
@ -0,0 +1,67 @@
+import json
+
+import numpy as np
+import requests
+from pytest import MonkeyPatch
+
+from langchain_community.embeddings import LlamafileEmbeddings
+
+
+def mock_response() -> requests.Response:
+    contents = json.dumps({"embedding": np.random.randn(512).tolist()})
+    response = requests.Response()
+    response.status_code = 200
+    response._content = str.encode(contents)
+    return response
+
+
+def test_embed_documents(monkeypatch: MonkeyPatch) -> None:
+    """
+    Test basic functionality of the `embed_documents` method
+    """
+    embedder = LlamafileEmbeddings(
+        base_url="http://llamafile-host:8080",
+    )
+
+    def mock_post(url, headers, json, timeout):  # type: ignore[no-untyped-def]
+        assert url == "http://llamafile-host:8080/embedding"
+        assert headers == {
+            "Content-Type": "application/json",
+        }
+        # 'unknown' kwarg should be ignored
+        assert json == {"content": "Test text"}
+        # assert stream is False
+        assert timeout is None
+        return mock_response()
+
+    monkeypatch.setattr(requests, "post", mock_post)
+    out = embedder.embed_documents(["Test text", "Test text"])
+    assert isinstance(out, list)
+    assert len(out) == 2
+    for vec in out:
+        assert len(vec) == 512
+
+
+def test_embed_query(monkeypatch: MonkeyPatch) -> None:
+    """
+    Test basic functionality of the `embed_query` method
+    """
+    embedder = LlamafileEmbeddings(
+        base_url="http://llamafile-host:8080",
+    )
+
+    def mock_post(url, headers, json, timeout):  # type: ignore[no-untyped-def]
+        assert url == "http://llamafile-host:8080/embedding"
+        assert headers == {
+            "Content-Type": "application/json",
+        }
+        # 'unknown' kwarg should be ignored
+        assert json == {"content": "Test text"}
+        # assert stream is False
+        assert timeout is None
+        return mock_response()
+
+    monkeypatch.setattr(requests, "post", mock_post)
+    out = embedder.embed_query("Test text")
+    assert isinstance(out, list)
+    assert len(out) == 512
--- a/libs/community/tests/unit_tests/graphs/test_imports.py
+++ b/libs/community/tests/unit_tests/graphs/test_imports.py
@ -14,6 +14,7 @@ EXPECTED_ALL = [
    "FalkorDBGraph",
    "TigerGraph",
    "OntotextGraphDBGraph",
+    "GremlinGraph",
 ]


--- a/libs/langchain/langchain/chains/graph_qa/gremlin.py
+++ b/libs/langchain/langchain/chains/graph_qa/gremlin.py
@ -0,0 +1,221 @@
+"""Question answering over a graph."""
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+from langchain_community.graphs import GremlinGraph
+from langchain_core.callbacks.manager import CallbackManager, CallbackManagerForChainRun
+from langchain_core.language_models import BaseLanguageModel
+from langchain_core.prompts import BasePromptTemplate
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.pydantic_v1 import Field
+
+from langchain.chains.base import Chain
+from langchain.chains.graph_qa.prompts import (
+    CYPHER_QA_PROMPT,
+    GRAPHDB_SPARQL_FIX_TEMPLATE,
+    GREMLIN_GENERATION_PROMPT,
+)
+from langchain.chains.llm import LLMChain
+
+INTERMEDIATE_STEPS_KEY = "intermediate_steps"
+
+
+def extract_gremlin(text: str) -> str:
+    """Extract Gremlin code from a text.
+
+    Args:
+        text: Text to extract Gremlin code from.
+
+    Returns:
+        Gremlin code extracted from the text.
+    """
+    text = text.replace("`", "")
+    if text.startswith("gremlin"):
+        text = text[len("gremlin") :]
+    return text.replace("\n", "")
+
+
+class GremlinQAChain(Chain):
+    """Chain for question-answering against a graph by generating gremlin statements.
+
+    *Security note*: Make sure that the database connection uses credentials
+        that are narrowly-scoped to only include necessary permissions.
+        Failure to do so may result in data corruption or loss, since the calling
+        code may attempt commands that would result in deletion, mutation
+        of data if appropriately prompted or reading sensitive data if such
+        data is present in the database.
+        The best way to guard against such negative outcomes is to (as appropriate)
+        limit the permissions granted to the credentials used with this tool.
+
+        See https://python.langchain.com/docs/security for more information.
+    """
+
+    graph: GremlinGraph = Field(exclude=True)
+    gremlin_generation_chain: LLMChain
+    qa_chain: LLMChain
+    gremlin_fix_chain: LLMChain
+    max_fix_retries: int = 3
+    input_key: str = "query"  #: :meta private:
+    output_key: str = "result"  #: :meta private:
+    top_k: int = 100
+    return_direct: bool = False
+    return_intermediate_steps: bool = False
+
+    @property
+    def input_keys(self) -> List[str]:
+        """Input keys.
+
+        :meta private:
+        """
+        return [self.input_key]
+
+    @property
+    def output_keys(self) -> List[str]:
+        """Output keys.
+
+        :meta private:
+        """
+        _output_keys = [self.output_key]
+        return _output_keys
+
+    @classmethod
+    def from_llm(
+        cls,
+        llm: BaseLanguageModel,
+        *,
+        gremlin_fix_prompt: BasePromptTemplate = PromptTemplate(
+            input_variables=["error_message", "generated_sparql", "schema"],
+            template=GRAPHDB_SPARQL_FIX_TEMPLATE.replace("SPARQL", "Gremlin").replace(
+                "in Turtle format", ""
+            ),
+        ),
+        qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
+        gremlin_prompt: BasePromptTemplate = GREMLIN_GENERATION_PROMPT,
+        **kwargs: Any,
+    ) -> GremlinQAChain:
+        """Initialize from LLM."""
+        qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
+        gremlin_generation_chain = LLMChain(llm=llm, prompt=gremlin_prompt)
+        gremlinl_fix_chain = LLMChain(llm=llm, prompt=gremlin_fix_prompt)
+        return cls(
+            qa_chain=qa_chain,
+            gremlin_generation_chain=gremlin_generation_chain,
+            gremlin_fix_chain=gremlinl_fix_chain,
+            **kwargs,
+        )
+
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, str]:
+        """Generate gremlin statement, use it to look up in db and answer question."""
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        callbacks = _run_manager.get_child()
+        question = inputs[self.input_key]
+
+        intermediate_steps: List = []
+
+        chain_response = self.gremlin_generation_chain.invoke(
+            {"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
+        )
+
+        generated_gremlin = extract_gremlin(
+            chain_response[self.gremlin_generation_chain.output_key]
+        )
+
+        _run_manager.on_text("Generated gremlin:", end="\n", verbose=self.verbose)
+        _run_manager.on_text(
+            generated_gremlin, color="green", end="\n", verbose=self.verbose
+        )
+
+        intermediate_steps.append({"query": generated_gremlin})
+
+        if generated_gremlin:
+            context = self.execute_with_retry(
+                _run_manager, callbacks, generated_gremlin
+            )[: self.top_k]
+        else:
+            context = []
+
+        if self.return_direct:
+            final_result = context
+        else:
+            _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
+            _run_manager.on_text(
+                str(context), color="green", end="\n", verbose=self.verbose
+            )
+
+            intermediate_steps.append({"context": context})
+
+            result = self.qa_chain.invoke(
+                {"question": question, "context": context},
+                callbacks=callbacks,
+            )
+            final_result = result[self.qa_chain.output_key]
+
+        chain_result: Dict[str, Any] = {self.output_key: final_result}
+        if self.return_intermediate_steps:
+            chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
+
+        return chain_result
+
+    def execute_query(self, query: str) -> List[Any]:
+        try:
+            return self.graph.query(query)
+        except Exception as e:
+            if hasattr(e, "status_message"):
+                raise ValueError(e.status_message)
+            else:
+                raise ValueError(str(e))
+
+    def execute_with_retry(
+        self,
+        _run_manager: CallbackManagerForChainRun,
+        callbacks: CallbackManager,
+        generated_gremlin: str,
+    ) -> List[Any]:
+        try:
+            return self.execute_query(generated_gremlin)
+        except Exception as e:
+            retries = 0
+            error_message = str(e)
+            self.log_invalid_query(_run_manager, generated_gremlin, error_message)
+
+            while retries < self.max_fix_retries:
+                try:
+                    fix_chain_result = self.gremlin_fix_chain.invoke(
+                        {
+                            "error_message": error_message,
+                            # we are borrowing template from sparql
+                            "generated_sparql": generated_gremlin,
+                            "schema": self.schema,
+                        },
+                        callbacks=callbacks,
+                    )
+                    fixed_gremlin = fix_chain_result[self.gremlin_fix_chain.output_key]
+                    return self.execute_query(fixed_gremlin)
+                except Exception as e:
+                    retries += 1
+                    parse_exception = str(e)
+                    self.log_invalid_query(_run_manager, fixed_gremlin, parse_exception)
+
+        raise ValueError("The generated Gremlin query is invalid.")
+
+    def log_invalid_query(
+        self,
+        _run_manager: CallbackManagerForChainRun,
+        generated_query: str,
+        error_message: str,
+    ) -> None:
+        _run_manager.on_text("Invalid Gremlin query: ", end="\n", verbose=self.verbose)
+        _run_manager.on_text(
+            generated_query, color="red", end="\n", verbose=self.verbose
+        )
+        _run_manager.on_text(
+            "Gremlin Query Parse Error: ", end="\n", verbose=self.verbose
+        )
+        _run_manager.on_text(
+            error_message, color="red", end="\n\n", verbose=self.verbose
+        )
--- a/libs/partners/ibm/README.md
+++ b/libs/partners/ibm/README.md
@ -1,6 +1,6 @@
 # langchain-ibm

-This package provides the integration between LangChain and IBM Watson AI through the `ibm-watsonx-ai` SDK.
+This package provides the integration between LangChain and IBM watsonx.ai through the `ibm-watsonx-ai` SDK.

 ## Installation

@ -10,10 +10,6 @@ To use the `langchain-ibm` package, follow these installation steps:
 pip install langchain-ibm
 ```

-
-
-
-
 ## Usage

 ### Setting up
@ -44,15 +40,10 @@ In alternative, you can set the environment variable in your terminal.
    set WATSONX_APIKEY=your_ibm_api_key
    ```

-
-
-
-
 ### Loading the model

 You might need to adjust model parameters for different models or tasks. For more details on the parameters, refer to IBM's [documentation](https://ibm.github.io/watsonx-ai-python-sdk/fm_model.html#metanames.GenTextParamsMetaNames).

-
 ```python
 parameters = {
    "decoding_method": "sample",
@ -83,7 +74,6 @@ watsonx_llm = WatsonxLLM(
 - You need to specify the model you want to use for inferencing through `model_id`. You can find the list of available models [here](https://ibm.github.io/watsonx-ai-python-sdk/fm_model.html#ibm_watsonx_ai.foundation_models.utils.enums.ModelTypes).


-
 Alternatively you can use Cloud Pak for Data credentials. For more details, refer to IBM's [documentation](https://ibm.github.io/watsonx-ai-python-sdk/setup_cpd.html).

 ```python
@ -99,9 +89,6 @@ watsonx_llm = WatsonxLLM(
 )
 ```

-
-
-
 ### Create a Chain

 Create `PromptTemplate` objects which will be responsible for creating a random question.
@ -123,10 +110,6 @@ response = llm_chain.invoke("dog")
 print(response)
 ```

-
-
-
-
 ### Calling the Model Directly
 To obtain completions, you can call the model directly using a string prompt.

@ -149,9 +132,6 @@ response = watsonx_llm.generate(
 print(response)
 ```

-
-
-
 ### Streaming the Model output

 You can stream the model output.
--- a/libs/partners/ibm/langchain_ibm/llms.py
+++ b/libs/partners/ibm/langchain_ibm/llms.py
@ -16,7 +16,7 @@ class WatsonxLLM(BaseLLM):
    """
    IBM watsonx.ai large language models.

-    To use, you should have ``ibm_watsonx_ai`` python package installed,
+    To use, you should have ``langchain_ibm`` python package installed,
    and the environment variable ``WATSONX_APIKEY`` set with your API key, or pass
    it as a named parameter to the constructor.

@ -103,6 +103,18 @@ class WatsonxLLM(BaseLLM):

    @property
    def lc_secrets(self) -> Dict[str, str]:
+        """A map of constructor argument names to secret ids.
+
+        For example:
+            {
+                "url": "WATSONX_URL",
+                "apikey": "WATSONX_APIKEY",
+                "token": "WATSONX_TOKEN",
+                "password": "WATSONX_PASSWORD",
+                "username": "WATSONX_USERNAME",
+                "instance_id": "WATSONX_INSTANCE_ID",
+            }
+        """
        return {
            "url": "WATSONX_URL",
            "apikey": "WATSONX_APIKEY",
--- a/libs/partners/ibm/poetry.lock
+++ b/libs/partners/ibm/poetry.lock
@ -245,13 +245,13 @@ ibm-cos-sdk-core = "2.13.4"

 [[package]]
 name = "ibm-watson-machine-learning"
-version = "1.0.348"
+version = "1.0.349"
 description = "IBM Watson Machine Learning API Client"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ibm_watson_machine_learning-1.0.348-py3-none-any.whl", hash = "sha256:46cec3bcc9c6522440290d284845390158b92573d9759b77b6987d22580a1222"},
-    {file = "ibm_watson_machine_learning-1.0.348.tar.gz", hash = "sha256:9eceaae1c7b01457c9d5e3bdfb24209a828b4c6212b159dbfd9fdb9a74268e14"},
+    {file = "ibm_watson_machine_learning-1.0.349-py3-none-any.whl", hash = "sha256:b5bc4cdec2a9cda1c9fa6681558f721a7a3058937257ae3040618c7183fe2f55"},
+    {file = "ibm_watson_machine_learning-1.0.349.tar.gz", hash = "sha256:46dd3d67bee39c3e84b047e651bafa06dfb0cb973354c4e2c582928340b51a17"},
 ]

 [package.dependencies]
@ -274,17 +274,17 @@ fl-rt23-1-py3-10 = ["GPUtil", "cloudpickle (==1.3.0)", "cryptography (==39.0.1)"

 [[package]]
 name = "ibm-watsonx-ai"
-version = "0.1.8"
+version = "0.2.0"
 description = "IBM watsonx.ai API Client"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ibm_watsonx_ai-0.1.8-py3-none-any.whl", hash = "sha256:85536b00aa3c495540480e53a17b56a0990d1340e47fae0e7ea778dcd717e5dc"},
-    {file = "ibm_watsonx_ai-0.1.8.tar.gz", hash = "sha256:ba4e60091165cb755985f85ef0ece1db76ad1d351dd515a55d739467196dace3"},
+    {file = "ibm_watsonx_ai-0.2.0-py3-none-any.whl", hash = "sha256:75234916b2cd9accedb401a3c11925845efa08361cff4978589f58e39dad48f6"},
+    {file = "ibm_watsonx_ai-0.2.0.tar.gz", hash = "sha256:9af9e402c6f0c74e30ac11c0ae1b80e55e21fb2b2d7c546fc506ab1b9be4851e"},
 ]

 [package.dependencies]
-ibm-watson-machine-learning = ">=1.0.335"
+ibm-watson-machine-learning = ">=1.0.349"

 [package.extras]
 fl = ["cloudpickle (==1.3.0)", "ddsketch (==1.1.2)", "diffprivlib (==0.5.1)", "environs (==9.5.0)", "gym", "image (==1.5.33)", "jsonpickle (==1.4.2)", "lz4", "numcompress (==0.1.2)", "numpy (==1.19.2)", "pandas (==1.3.4)", "parse (==1.19.0)", "pathlib2 (==2.3.6)", "psutil", "pyYAML (==5.4.1)", "pytest (==6.2.5)", "requests (==2.27.1)", "scikit-learn (==0.23.2)", "scipy (==1.6.3)", "setproctitle", "skorch (==0.11.0)", "tabulate (==0.8.9)", "tensorflow (==2.4.4)", "torch (==1.7.1)", "websockets (==8.1)"]
@ -372,7 +372,7 @@ files = [

 [[package]]
 name = "langchain-core"
-version = "0.1.27"
+version = "0.1.28"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
@ -398,13 +398,13 @@ url = "../../core"

 [[package]]
 name = "langsmith"
-version = "0.1.9"
+version = "0.1.12"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = ">=3.8.1,<4.0"
 files = [
-    {file = "langsmith-0.1.9-py3-none-any.whl", hash = "sha256:f821b3cb07a87eac5cb2181ff0b61051811e4eef09ae4b46e700981f7ae5dfb9"},
-    {file = "langsmith-0.1.9.tar.gz", hash = "sha256:9bd3e80607722c3d2db84cf3440005491a859b80b5e499bc988032d5c2da91f0"},
+    {file = "langsmith-0.1.12-py3-none-any.whl", hash = "sha256:4f3d03c365c4d9eb4ed151055e2830ea73235d8c8be0841a63334f9d4fcf8b2b"},
+    {file = "langsmith-0.1.12.tar.gz", hash = "sha256:cf8d371f92f1035fd98a9692edd8af9c3a60947db4a77864a9c07dec96d3b039"},
 ]

 [package.dependencies]
@ -666,13 +666,13 @@ testing = ["pytest", "pytest-benchmark"]

 [[package]]
 name = "pydantic"
-version = "2.6.2"
+version = "2.6.3"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pydantic-2.6.2-py3-none-any.whl", hash = "sha256:37a5432e54b12fecaa1049c5195f3d860a10e01bdfd24f1840ef14bd0d3aeab3"},
-    {file = "pydantic-2.6.2.tar.gz", hash = "sha256:a09be1c3d28f3abe37f8a78af58284b236a92ce520105ddc91a6d29ea1176ba7"},
+    {file = "pydantic-2.6.3-py3-none-any.whl", hash = "sha256:72c6034df47f46ccdf81869fddb81aade68056003900a8724a4f160700016a2a"},
+    {file = "pydantic-2.6.3.tar.gz", hash = "sha256:e07805c4c7f5c6826e33a1d4c9d47950d7eaf34868e2690f8594d2e30241f11f"},
 ]

 [package.dependencies]
@ -848,13 +848,13 @@ watchdog = ">=2.0.0"

 [[package]]
 name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
-    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
-    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]

 [package.dependencies]
@ -896,6 +896,7 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -1152,4 +1153,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<4.0"
-content-hash = "82240f0636b69c79784cf3b14d1b73f6df90e4a8e9f9ee4f6b6c0b8653c7ee90"
+content-hash = "96cc11c3e73681170c57bf7198ad3a1091620b2272e82dde34bcce4df8721d62"
--- a/libs/partners/ibm/pyproject.toml
+++ b/libs/partners/ibm/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-ibm"
-version = "0.1.0"
+version = "0.1.1"
 description = "An integration package connecting IBM watsonx.ai and LangChain"
 authors = ["IBM"]
 readme = "README.md"
@ -12,8 +12,8 @@ license = "MIT"

 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
-langchain-core = "^0.1.26"
-ibm-watsonx-ai = "^0.1.8"
+langchain-core = "^0.1.27"
+ibm-watsonx-ai = "^0.2.0"

 [tool.poetry.group.test]
 optional = true
@ -21,12 +21,11 @@ optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^7.3.0"
 freezegun = "^1.2.2"
-pytest-mock  = "^3.10.0"
-ibm-watsonx-ai = "^0.1.8"
+pytest-mock = "^3.10.0"
 syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
-langchain-core = {path = "../../core", develop = true}
+langchain-core = { path = "../../core", develop = true }

 [tool.poetry.group.codespell]
 optional = true
@ -38,7 +37,6 @@ codespell = "^2.2.0"
 optional = true

 [tool.poetry.group.test_integration.dependencies]
-ibm-watsonx-ai = "^0.1.8"

 [tool.poetry.group.lint]
 optional = true
@ -48,29 +46,27 @@ ruff = "^0.1.5"

 [tool.poetry.group.typing.dependencies]
 mypy = "^0.991"
-langchain-core = {path = "../../core", develop = true}
+langchain-core = { path = "../../core", develop = true }
 types-requests = "^2"

 [tool.poetry.group.dev]
 optional = true

 [tool.poetry.group.dev.dependencies]
-langchain-core = {path = "../../core", develop = true}
+langchain-core = { path = "../../core", develop = true }

 [tool.ruff]
 select = [
-  "E",  # pycodestyle
-  "F",  # pyflakes
-  "I",  # isort
+  "E", # pycodestyle
+  "F", # pyflakes
+  "I", # isort
 ]

 [tool.mypy]
 disallow_untyped_defs = "True"

 [tool.coverage.run]
-omit = [
-    "tests/*",
-]
+omit = ["tests/*"]

 [build-system]
 requires = ["poetry-core>=1.0.0"]
--- a/libs/partners/ibm/tests/integration_tests/test_llms.py
+++ b/libs/partners/ibm/tests/integration_tests/test_llms.py
@ -6,6 +6,10 @@ You'll need to set WATSONX_APIKEY and WATSONX_PROJECT_ID environment variables.
 import os

 from ibm_watsonx_ai.foundation_models import Model, ModelInference  # type: ignore
+from ibm_watsonx_ai.foundation_models.utils.enums import (  # type: ignore
+    DecodingMethods,
+    ModelTypes,
+)
 from ibm_watsonx_ai.metanames import GenTextParamsMetaNames  # type: ignore
 from langchain_core.outputs import LLMResult

@ -22,6 +26,26 @@ def test_watsonxllm_invoke() -> None:
        project_id=WX_PROJECT_ID,
    )
    response = watsonxllm.invoke("What color sunflower is?")
+    print(f"\nResponse: {response}")
+    assert isinstance(response, str)
+    assert len(response) > 0
+
+
+def test_watsonxllm_invoke_with_params() -> None:
+    parameters = {
+        GenTextParamsMetaNames.DECODING_METHOD: "sample",
+        GenTextParamsMetaNames.MAX_NEW_TOKENS: 10,
+        GenTextParamsMetaNames.MIN_NEW_TOKENS: 5,
+    }
+
+    watsonxllm = WatsonxLLM(
+        model_id="google/flan-ul2",
+        url="https://us-south.ml.cloud.ibm.com",
+        project_id=WX_PROJECT_ID,
+        params=parameters,
+    )
+    response = watsonxllm.invoke("What color sunflower is?")
+    print(f"\nResponse: {response}")
    assert isinstance(response, str)
    assert len(response) > 0

@ -33,7 +57,9 @@ def test_watsonxllm_generate() -> None:
        project_id=WX_PROJECT_ID,
    )
    response = watsonxllm.generate(["What color sunflower is?"])
+    print(f"\nResponse: {response}")
    response_text = response.generations[0][0].text
+    print(f"Response text: {response_text}")
    assert isinstance(response, LLMResult)
    assert len(response_text) > 0

@ -47,7 +73,9 @@ def test_watsonxllm_generate_with_multiple_prompts() -> None:
    response = watsonxllm.generate(
        ["What color sunflower is?", "What color turtle is?"]
    )
+    print(f"\nResponse: {response}")
    response_text = response.generations[0][0].text
+    print(f"Response text: {response_text}")
    assert isinstance(response, LLMResult)
    assert len(response_text) > 0

@ -59,7 +87,9 @@ def test_watsonxllm_generate_stream() -> None:
        project_id=WX_PROJECT_ID,
    )
    response = watsonxllm.generate(["What color sunflower is?"], stream=True)
+    print(f"\nResponse: {response}")
    response_text = response.generations[0][0].text
+    print(f"Response text: {response_text}")
    assert isinstance(response, LLMResult)
    assert len(response_text) > 0

@ -71,6 +101,7 @@ def test_watsonxllm_stream() -> None:
        project_id=WX_PROJECT_ID,
    )
    response = watsonxllm.invoke("What color sunflower is?")
+    print(f"\nResponse: {response}")

    stream_response = watsonxllm.stream("What color sunflower is?")

@ -80,7 +111,7 @@ def test_watsonxllm_stream() -> None:
            chunk, str
        ), f"chunk expect type '{str}', actual '{type(chunk)}'"
        linked_text_stream += chunk
-
+    print(f"Linked text stream: {linked_text_stream}")
    assert (
        response == linked_text_stream
    ), "Linked text stream are not the same as generated text"
@ -141,3 +172,28 @@ def test_watsonxllm_invoke_from_wx_model_inference_with_params() -> None:
    print(f"\nResponse: {response}")
    assert isinstance(response, str)
    assert len(response) > 0
+
+
+def test_watsonxllm_invoke_from_wx_model_inference_with_params_as_enum() -> None:
+    parameters = {
+        GenTextParamsMetaNames.DECODING_METHOD: DecodingMethods.GREEDY,
+        GenTextParamsMetaNames.MAX_NEW_TOKENS: 100,
+        GenTextParamsMetaNames.MIN_NEW_TOKENS: 10,
+        GenTextParamsMetaNames.TEMPERATURE: 0.5,
+        GenTextParamsMetaNames.TOP_K: 50,
+        GenTextParamsMetaNames.TOP_P: 1,
+    }
+    model = ModelInference(
+        model_id=ModelTypes.FLAN_UL2,
+        credentials={
+            "apikey": WX_APIKEY,
+            "url": "https://us-south.ml.cloud.ibm.com",
+        },
+        project_id=WX_PROJECT_ID,
+        params=parameters,
+    )
+    watsonxllm = WatsonxLLM(watsonx_model=model)
+    response = watsonxllm.invoke("What color sunflower is?")
+    print(f"\nResponse: {response}")
+    assert isinstance(response, str)
+    assert len(response) > 0
--- a/libs/partners/mongodb/tests/integration_tests/test_vectorstores.py
+++ b/libs/partners/mongodb/tests/integration_tests/test_vectorstores.py
@ -50,7 +50,8 @@ class TestMongoDBAtlasVectorSearch:
    def setup_class(cls) -> None:
        # insure the test collection is empty
        collection = get_collection()
-        assert collection.count_documents({}) == 0  # type: ignore[index]  # noqa: E501
+        if collection.count_documents({}):
+            collection.delete_many({})  # type: ignore[index]  # noqa: E501

    @classmethod
    def teardown_class(cls) -> None:
--- a/libs/partners/pinecone/README.md
+++ b/libs/partners/pinecone/README.md
@ -15,12 +15,12 @@ And you should configure credentials by setting the following environment variab

 ## Usage

-The `Pinecone` class exposes the connection to the Pinecone vector store.
+The `PineconeVectorStore` class exposes the connection to the Pinecone vector store.

 ```python
 from langchain_pinecone import PineconeVectorStore

 embeddings = ... # use a LangChain Embeddings class

-vectorstore = Pinecone(embeddings=embeddings)
+vectorstore = PineconeVectorStore(embeddings=embeddings)
 ```
--- a/templates/gemini-functions-agent/main.py
+++ b/templates/gemini-functions-agent/main.py
@ -1,5 +0,0 @@
-from openai_functions_agent.agent import agent_executor
-
-if __name__ == "__main__":
-    question = "who won the womens world cup in 2023?"
-    print(agent_executor.invoke({"input": question, "chat_history": []}))  # noqa: T201