From d95eeaedbe8aa62ef972ef5f1d668218be662222 Mon Sep 17 00:00:00 2001
From: Massimiliano Pronesti <massimiliano.pronesti@gmail.com>
Date: Mon, 14 Aug 2023 08:03:05 +0200
Subject: [PATCH] feat(llms): support vLLM's OpenAI-compatible server (#9179)

This PR aims at supporting [vLLM's OpenAI-compatible server
feature](https://vllm.readthedocs.io/en/latest/getting_started/quickstart.html#openai-compatible-server),
i.e. allowing to call vLLM's LLMs like if they were OpenAI's.

I've also udpated the related notebook providing an example usage. At
the moment, vLLM only supports the `Completion` API.
---
 docs/extras/integrations/llms/vllm.ipynb  | 45 +++++++++++++++++++++++
 libs/langchain/langchain/llms/__init__.py |  4 +-
 libs/langchain/langchain/llms/vllm.py     | 25 +++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/docs/extras/integrations/llms/vllm.ipynb b/docs/extras/integrations/llms/vllm.ipynb
index 7e3c5068c6..8a9ff01df1 100644
--- a/docs/extras/integrations/llms/vllm.ipynb
+++ b/docs/extras/integrations/llms/vllm.ipynb
@@ -170,6 +170,51 @@
     "\n",
     "llm(\"What is the future of AI?\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64e89be0-6ad7-43a8-9dac-1324dcd4e851",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## OpenAI-Compatible Server\n",
+    "\n",
+    "vLLM can be deployed as a server that mimics the OpenAI API protocol. This allows vLLM to be used as a drop-in replacement for applications using OpenAI API.\n",
+    "\n",
+    "This server can be queried in the same format as OpenAI API.\n",
+    "\n",
+    "### OpenAI-Compatible Completion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c3cbc428-0bb8-422a-913e-1c6fef8b89d4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " a city that is filled with history, ancient buildings, and art around every corner\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.llms import VLLMOpenAI\n",
+    "\n",
+    "\n",
+    "llm = VLLMOpenAI(\n",
+    "    openai_api_key=\"EMPTY\",\n",
+    "    openai_api_base=\"http://localhost:8000/v1\",\n",
+    "    model_name=\"tiiuae/falcon-7b\",\n",
+    "    model_kwargs={\"stop\": [\".\"]}\n",
+    ")\n",
+    "print(llm(\"Rome is\"))"
+   ]
   }
  ],
  "metadata": {
diff --git a/libs/langchain/langchain/llms/__init__.py b/libs/langchain/langchain/llms/__init__.py
index 5eae603caf..82f904eebb 100644
--- a/libs/langchain/langchain/llms/__init__.py
+++ b/libs/langchain/langchain/llms/__init__.py
@@ -80,7 +80,7 @@ from langchain.llms.textgen import TextGen
 from langchain.llms.titan_takeoff import TitanTakeoff
 from langchain.llms.tongyi import Tongyi
 from langchain.llms.vertexai import VertexAI
-from langchain.llms.vllm import VLLM
+from langchain.llms.vllm import VLLM, VLLMOpenAI
 from langchain.llms.writer import Writer
 from langchain.llms.xinference import Xinference
 
@@ -149,6 +149,7 @@ __all__ = [
     "Tongyi",
     "VertexAI",
     "VLLM",
+    "VLLMOpenAI",
     "Writer",
     "OctoAIEndpoint",
     "Xinference",
@@ -213,6 +214,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
     "openllm": OpenLLM,
     "openllm_client": OpenLLM,
     "vllm": VLLM,
+    "vllm_openai": VLLMOpenAI,
     "writer": Writer,
     "xinference": Xinference,
 }
diff --git a/libs/langchain/langchain/llms/vllm.py b/libs/langchain/langchain/llms/vllm.py
index 0a6c4dbec0..61fa5e6666 100644
--- a/libs/langchain/langchain/llms/vllm.py
+++ b/libs/langchain/langchain/llms/vllm.py
@@ -4,6 +4,7 @@ from pydantic import root_validator
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import BaseLLM
+from langchain.llms.openai import BaseOpenAI
 from langchain.schema.output import Generation, LLMResult
 
 
@@ -127,3 +128,27 @@ class VLLM(BaseLLM):
     def _llm_type(self) -> str:
         """Return type of llm."""
         return "vllm"
+
+
+class VLLMOpenAI(BaseOpenAI):
+    """vLLM OpenAI-compatible API client"""
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        """Get the parameters used to invoke the model."""
+        openai_creds: Dict[str, Any] = {
+            "api_key": self.openai_api_key,
+            "api_base": self.openai_api_base,
+        }
+
+        return {
+            "model": self.model_name,
+            **openai_creds,
+            **self._default_params,
+            "logit_bias": None,
+        }
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "vllm-openai"