langchain/docs/extras/integrations/text_embedding/deepinfra.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DeepInfra\n",
    "\n",
    "[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for text embeddings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " ········\n"
     ]
    }
   ],
   "source": [
    "# sign up for an account: https://deepinfra.com/login?utm_source=langchain\n",
    "\n",
    "from getpass import getpass\n",
    "\n",
    "DEEPINFRA_API_TOKEN = getpass()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings import DeepInfraEmbeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = DeepInfraEmbeddings(\n",
    "    model_id=\"sentence-transformers/clip-ViT-B-32\",\n",
    "    query_instruction=\"\",\n",
    "    embed_instruction=\"\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "docs = [\"Dog is not a cat\", \"Beta is the second letter of Greek alphabet\"]\n",
    "document_result = embeddings.embed_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What is the first letter of Greek alphabet\"\n",
    "query_result = embeddings.embed_query(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine similarity between \"Dog is not a cat\" and query: 0.7489097144129355\n",
      "Cosine similarity between \"Beta is the second letter of Greek alphabet\" and query: 0.9519380640702013\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "query_numpy = np.array(query_result)\n",
    "for doc_res, doc in zip(document_result, docs):\n",
    "    document_numpy = np.array(doc_res)\n",
    "    similarity = np.dot(query_numpy, document_numpy) / (\n",
    "        np.linalg.norm(query_numpy) * np.linalg.norm(document_numpy)\n",
    "    )\n",
    "    print(f'Cosine similarity between \"{doc}\" and query: {similarity}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}