Added embeddings support for ollama (#10124)

- Description: Added support for Ollama embeddings - Issue: the issue # it fixes (if applicable), - Dependencies: N/A - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: @herrjemand cc https://github.com/jmorganca/ollama/issues/436
1 year ago · 5e50b89164
parent 48a4efc51a
commit 5e50b89164
4 changed files with 458 additions and 4 deletions
--- a/docs/extras/integrations/llms/ollama.ipynb
+++ b/docs/extras/integrations/llms/ollama.ipynb
@ -106,6 +106,25 @@
    "llm(\"Tell me about the history of AI\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Ollama supports embeddings via `OllamaEmbeddings`:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import OllamaEmbeddings\n",
+    "oembed = OllamaEmbeddings(base_url=\"http://localhost:11434\", model=\"llama2\")\n",
+    "\n",
+    "oembed.embed_query(\"Llamas are social animals and live with others as a herd.\")"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -121,7 +140,7 @@
    "ollama run llama2:13b \n",
    "```\n",
    "\n",
-    "Let's also use local embeddings from `GPT4AllEmbeddings` and `Chroma`."
+    "Let's also use local embeddings from `OllamaEmbeddings` and `Chroma`."
   ]
  },
  {
@ -163,9 +182,9 @@
   ],
   "source": [
    "from langchain.vectorstores import Chroma\n",
-    "from langchain.embeddings import GPT4AllEmbeddings\n",
+    "from langchain.embeddings import OllamaEmbeddings\n",
    "\n",
-    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())"
+    "vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings())"
   ]
  },
  {
@ -353,7 +372,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.11.5"
  }
 },
 "nbformat": 4,
--- a/docs/extras/integrations/text_embedding/ollama.ipynb
+++ b/docs/extras/integrations/text_embedding/ollama.ipynb
@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "278b6c63",
+   "metadata": {},
+   "source": [
+    "# Ollama\n",
+    "\n",
+    "Let's load the Ollama Embeddings class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0be1af71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings import OllamaEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2c66e5da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OllamaEmbeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "01370375",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a42e4035",
+   "metadata": {},
+   "source": [
+    "To generate embeddings, you can either query an invidivual text, or you can query a list of texts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "91bc875d-829b-4c3d-8e6f-fc2dda30a3bd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.09996652603149414,\n",
+       " 0.015568195842206478,\n",
+       " 0.17670190334320068,\n",
+       " 0.16521021723747253,\n",
+       " 0.21193109452724457]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result = embeddings.embed_query(text)\n",
+    "query_result[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a4b0d49e-0c73-44b6-aed5-5b426564e085",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.04242777079343796,\n",
+       " 0.016536075621843338,\n",
+       " 0.10052520781755447,\n",
+       " 0.18272875249385834,\n",
+       " 0.2079043835401535]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "doc_result = embeddings.embed_documents([text])\n",
+    "doc_result[0][:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bb61bbeb",
+   "metadata": {},
+   "source": [
+    "Let's load the Ollama Embeddings class with smaller model (e.g. llama:7b). Note: See other supported models [https://ollama.ai/library](https://ollama.ai/library)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a56b70f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "embeddings = OllamaEmbeddings(model=\"llama2:7b\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "14aefb64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This is a test document.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3c39ed33",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_result = embeddings.embed_query(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "2ee7ce9f-d506-4810-8897-e44334412714",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.09996627271175385,\n",
+       " 0.015567859634757042,\n",
+       " 0.17670205235481262,\n",
+       " 0.16521376371383667,\n",
+       " 0.21193283796310425]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query_result[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "e3221db6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc_result = embeddings.embed_documents([text])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a0865409-3a6d-468f-939f-abde17c7cac3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.042427532374858856,\n",
+       " 0.01653730869293213,\n",
+       " 0.10052604228258133,\n",
+       " 0.18272635340690613,\n",
+       " 0.20790338516235352]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "doc_result[0][:5]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/libs/langchain/langchain/embeddings/init.py
+++ b/libs/langchain/langchain/embeddings/init.py
@ -49,6 +49,7 @@ from langchain.embeddings.modelscope_hub import ModelScopeEmbeddings
 from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
 from langchain.embeddings.nlpcloud import NLPCloudEmbeddings
 from langchain.embeddings.octoai_embeddings import OctoAIEmbeddings
+from langchain.embeddings.ollama import OllamaEmbeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings
 from langchain.embeddings.self_hosted import SelfHostedEmbeddings
@ -106,6 +107,7 @@ __all__ = [
    "AwaEmbeddings",
    "HuggingFaceBgeEmbeddings",
    "ErnieEmbeddings",
+    "OllamaEmbeddings",
    "QianfanEmbeddingsEndpoint",
 ]

--- a/libs/langchain/langchain/embeddings/ollama.py
+++ b/libs/langchain/langchain/embeddings/ollama.py
@ -0,0 +1,205 @@
+from typing import Any, Dict, List, Mapping, Optional
+
+import requests
+
+from langchain.embeddings.base import Embeddings
+from langchain.pydantic_v1 import BaseModel, Extra
+
+
+class OllamaEmbeddings(BaseModel, Embeddings):
+    """Ollama locally runs large language models.
+
+    To use, follow the instructions at https://ollama.ai/.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.embeddings import OllamaEmbeddings
+            ollama_emb = OllamaEmbeddings(
+                model="llama:7b",
+            )
+            r1 = ollama_emb.embed_documents(
+                [
+                    "Alpha is the first letter of Greek alphabet",
+                    "Beta is the second letter of Greek alphabet",
+                ]
+            )
+            r2 = ollama_emb.embed_query(
+                "What is the second letter of Greek alphabet"
+            )
+
+    """
+
+    base_url: str = "http://localhost:11434"
+    """Base url the model is hosted under."""
+    model: str = "llama2"
+    """Model name to use."""
+
+    embed_instruction: str = "passage: "
+    """Instruction used to embed documents."""
+    query_instruction: str = "query: "
+    """Instruction used to embed the query."""
+
+    mirostat: Optional[int]
+    """Enable Mirostat sampling for controlling perplexity.
+    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+
+    mirostat_eta: Optional[float]
+    """Influences how quickly the algorithm responds to feedback
+    from the generated text. A lower learning rate will result in
+    slower adjustments, while a higher learning rate will make
+    the algorithm more responsive. (Default: 0.1)"""
+
+    mirostat_tau: Optional[float]
+    """Controls the balance between coherence and diversity
+    of the output. A lower value will result in more focused and
+    coherent text. (Default: 5.0)"""
+
+    num_ctx: Optional[int]
+    """Sets the size of the context window used to generate the
+    next token. (Default: 2048)	"""
+
+    num_gpu: Optional[int]
+    """The number of GPUs to use. On macOS it defaults to 1 to
+    enable metal support, 0 to disable."""
+
+    num_thread: Optional[int]
+    """Sets the number of threads to use during computation.
+    By default, Ollama will detect this for optimal performance.
+    It is recommended to set this value to the number of physical
+    CPU cores your system has (as opposed to the logical number of cores)."""
+
+    repeat_last_n: Optional[int]
+    """Sets how far back for the model to look back to prevent
+    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+
+    repeat_penalty: Optional[float]
+    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
+    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
+    will be more lenient. (Default: 1.1)"""
+
+    temperature: Optional[float]
+    """The temperature of the model. Increasing the temperature will
+    make the model answer more creatively. (Default: 0.8)"""
+
+    stop: Optional[List[str]]
+    """Sets the stop tokens to use."""
+
+    tfs_z: Optional[float]
+    """Tail free sampling is used to reduce the impact of less probable
+    tokens from the output. A higher value (e.g., 2.0) will reduce the
+    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+
+    top_k: Optional[int]
+    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
+    will give more diverse answers, while a lower value (e.g. 10)
+    will be more conservative. (Default: 40)"""
+
+    top_p: Optional[int]
+    """Works together with top-k. A higher value (e.g., 0.95) will lead
+    to more diverse text, while a lower value (e.g., 0.5) will
+    generate more focused and conservative text. (Default: 0.9)"""
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling Ollama."""
+        return {
+            "model": self.model,
+            "options": {
+                "mirostat": self.mirostat,
+                "mirostat_eta": self.mirostat_eta,
+                "mirostat_tau": self.mirostat_tau,
+                "num_ctx": self.num_ctx,
+                "num_gpu": self.num_gpu,
+                "num_thread": self.num_thread,
+                "repeat_last_n": self.repeat_last_n,
+                "repeat_penalty": self.repeat_penalty,
+                "temperature": self.temperature,
+                "stop": self.stop,
+                "tfs_z": self.tfs_z,
+                "top_k": self.top_k,
+                "top_p": self.top_p,
+            },
+        }
+
+    model_kwargs: Optional[dict] = None
+    """Other model keyword args"""
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model": self.model}, **self._default_params}
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    def _process_emb_response(self, input: str) -> List[float]:
+        """Process a response from the API.
+
+        Args:
+            response: The response from the API.
+
+        Returns:
+            The response as a dictionary.
+        """
+        headers = {
+            "Content-Type": "application/json",
+        }
+
+        try:
+            res = requests.post(
+                f"{self.base_url}/api/embeddings",
+                headers=headers,
+                json={"model": self.model, "prompt": input, **self._default_params},
+            )
+        except requests.exceptions.RequestException as e:
+            raise ValueError(f"Error raised by inference endpoint: {e}")
+
+        if res.status_code != 200:
+            raise ValueError(
+                "Error raised by inference API HTTP code: %s, %s"
+                % (res.status_code, res.text)
+            )
+        try:
+            t = res.json()
+            return t["embedding"]
+        except requests.exceptions.JSONDecodeError as e:
+            raise ValueError(
+                f"Error raised by inference API: {e}.\nResponse: {res.text}"
+            )
+
+    def _embed(self, input: List[str]) -> List[List[float]]:
+        embeddings_list: List[List[float]] = []
+        for prompt in input:
+            embeddings = self._process_emb_response(prompt)
+            embeddings_list.append(embeddings)
+
+        return embeddings_list
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents using a Ollama deployed embedding model.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        instruction_pairs = [f"{self.embed_instruction}{text}" for text in texts]
+        embeddings = self._embed(instruction_pairs)
+        return embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        """Embed a query using a Ollama deployed embedding model.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        instruction_pair = f"{self.query_instruction}{text}"
+        embedding = self._embed([instruction_pair])[0]
+        return embedding