{ "cells": [ { "cell_type": "markdown", "id": "eec4efda", "metadata": {}, "source": [ "# Self Hosted Embeddings\n", "Let's load the SelfHostedEmbeddings, SelfHostedHuggingFaceEmbeddings, and SelfHostedHuggingFaceInstructEmbeddings classes." ] }, { "cell_type": "code", "execution_count": null, "id": "d338722a", "metadata": { "scrolled": true }, "outputs": [], "source": [ "from langchain.embeddings import (\n", " SelfHostedEmbeddings,\n", " SelfHostedHuggingFaceEmbeddings,\n", " SelfHostedHuggingFaceInstructEmbeddings,\n", ")\n", "import runhouse as rh" ] }, { "cell_type": "code", "execution_count": null, "id": "146559e8", "metadata": {}, "outputs": [], "source": [ "# For an on-demand A100 with GCP, Azure, or Lambda\n", "gpu = rh.cluster(name=\"rh-a10x\", instance_type=\"A100:1\", use_spot=False)\n", "\n", "# For an on-demand A10G with AWS (no single A100s on AWS)\n", "# gpu = rh.cluster(name='rh-a10x', instance_type='g5.2xlarge', provider='aws')\n", "\n", "# For an existing cluster\n", "# gpu = rh.cluster(ips=[''],\n", "# ssh_creds={'ssh_user': '...', 'ssh_private_key':''},\n", "# name='my-cluster')" ] }, { "cell_type": "code", "execution_count": null, "id": "1230f7df", "metadata": {}, "outputs": [], "source": [ "embeddings = SelfHostedHuggingFaceEmbeddings(hardware=gpu)" ] }, { "cell_type": "code", "execution_count": 6, "id": "2684e928", "metadata": {}, "outputs": [], "source": [ "text = \"This is a test document.\"" ] }, { "cell_type": "code", "execution_count": null, "id": "1dc5e606", "metadata": { "scrolled": true }, "outputs": [], "source": [ "query_result = embeddings.embed_query(text)" ] }, { "cell_type": "markdown", "id": "cef9cc54", "metadata": {}, "source": [ "And similarly for SelfHostedHuggingFaceInstructEmbeddings:" ] }, { "cell_type": "code", "execution_count": null, "id": "81a17ca3", "metadata": {}, "outputs": [], "source": [ "embeddings = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu)" ] }, { "cell_type": "markdown", "id": "5a33d1c8", "metadata": {}, "source": [ "Now let's load an embedding model with a custom load function:" ] }, { "cell_type": "code", "execution_count": 12, "id": "c4af5679", "metadata": {}, "outputs": [], "source": [ "def get_pipeline():\n", " from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " pipeline,\n", " ) # Must be inside the function in notebooks\n", "\n", " model_id = \"facebook/bart-base\"\n", " tokenizer = AutoTokenizer.from_pretrained(model_id)\n", " model = AutoModelForCausalLM.from_pretrained(model_id)\n", " return pipeline(\"feature-extraction\", model=model, tokenizer=tokenizer)\n", "\n", "\n", "def inference_fn(pipeline, prompt):\n", " # Return last hidden state of the model\n", " if isinstance(prompt, list):\n", " return [emb[0][-1] for emb in pipeline(prompt)]\n", " return pipeline(prompt)[0][-1]" ] }, { "cell_type": "code", "execution_count": null, "id": "8654334b", "metadata": {}, "outputs": [], "source": [ "embeddings = SelfHostedEmbeddings(\n", " model_load_fn=get_pipeline,\n", " hardware=gpu,\n", " model_reqs=[\"./\", \"torch\", \"transformers\"],\n", " inference_fn=inference_fn,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "fc1bfd0f", "metadata": { "scrolled": false }, "outputs": [], "source": [ "query_result = embeddings.embed_query(text)" ] }, { "cell_type": "code", "execution_count": null, "id": "aaad49f8", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" }, "vscode": { "interpreter": { "hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885" } } }, "nbformat": 4, "nbformat_minor": 5 }