langchain/docs/modules/models/text_embedding/examples/mosaicml.ipynb

{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MosaicML embeddings\n",
    "\n",
    "[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n",
    "\n",
    "This example goes over how to use LangChain to interact with MosaicML Inference for text embedding."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n",
    "\n",
    "from getpass import getpass\n",
    "\n",
    "MOSAICML_API_TOKEN = getpass()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings import MosaicMLInstructorEmbeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = MosaicMLInstructorEmbeddings(\n",
    "    query_instruction=\"Represent the query for retrieval: \"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_text = \"This is a test query.\"\n",
    "query_result = embeddings.embed_query(query_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "document_text = \"This is a test document.\"\n",
    "document_result = embeddings.embed_documents([document_text])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "query_numpy = np.array(query_result)\n",
    "document_numpy = np.array(document_result[0])\n",
    "similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n",
    "print(f\"Cosine similarity between document and query: {similarity}\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}