diff --git a/docs/extras/use_cases/code/index.mdx b/docs/extras/use_cases/code/index.mdx index d83075da..1287147b 100644 --- a/docs/extras/use_cases/code/index.mdx +++ b/docs/extras/use_cases/code/index.mdx @@ -22,5 +22,5 @@ Query Understanding: GPT-4 processes user queries, grasping the context and extr 5. Ask questions: Define a list of questions to ask about the codebase, and then use the ConversationalRetrievalChain to generate context-aware answers. The LLM (GPT-4) generates comprehensive, context-aware answers based on retrieved code snippets and conversation history. The full tutorial is available below. -- [Twitter the-algorithm codebase analysis with Deep Lake](code/twitter-the-algorithm-analysis-deeplake.html): A notebook walking through how to parse github source code and run queries conversation. -- [LangChain codebase analysis with Deep Lake](code/code-analysis-deeplake.html): A notebook walking through how to analyze and do question answering over THIS code base. +- [Twitter the-algorithm codebase analysis with Deep Lake](./twitter-the-algorithm-analysis-deeplake.html): A notebook walking through how to parse github source code and run queries conversation. +- [LangChain codebase analysis with Deep Lake](./code-analysis-deeplake.html): A notebook walking through how to analyze and do question answering over THIS code base. diff --git a/docs/extras/use_cases/code/twitter-the-algorithm-analysis-deeplake.ipynb b/docs/extras/use_cases/code/twitter-the-algorithm-analysis-deeplake.ipynb index e1eb9e08..60e9df71 100644 --- a/docs/extras/use_cases/code/twitter-the-algorithm-analysis-deeplake.ipynb +++ b/docs/extras/use_cases/code/twitter-the-algorithm-analysis-deeplake.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -29,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -61,6 +62,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,6 +80,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -86,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -105,6 +108,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -124,6 +128,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -140,12 +145,12 @@ "db = DeepLake(\n", " dataset_path=f\"hub://{username}/twitter-algorithm\",\n", " embedding_function=embeddings,\n", - " public=True,\n", - ") # dataset would be publicly available\n", + ")\n", "db.add_documents(texts)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -155,9 +160,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deep Lake Dataset in hub://davitbun/twitter-algorithm already exists, loading from the storage\n" + ] + } + ], "source": [ "db = DeepLake(\n", " dataset_path=\"hub://davitbun/twitter-algorithm\",\n", @@ -168,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -180,6 +193,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -188,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -208,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -391,6 +405,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [] @@ -412,7 +427,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/langchain/vectorstores/deeplake.py b/langchain/vectorstores/deeplake.py index 4a6397b5..95210059 100644 --- a/langchain/vectorstores/deeplake.py +++ b/langchain/vectorstores/deeplake.py @@ -8,6 +8,7 @@ import numpy as np try: import deeplake + from deeplake.core.fast_forwarding import version_compare from deeplake.core.vectorstore import DeepLakeVectorStore _DEEPLAKE_INSTALLED = True @@ -124,11 +125,11 @@ class DeepLake(VectorStore): "Please install it with `pip install deeplake`." ) - version = deeplake.__version__ - if version != "3.6.2": + if version_compare(deeplake.__version__, "3.6.2") == -1: raise ValueError( - "deeplake version should be = 3.6.3, but you've installed" - f" {version}. Consider changing deeplake version to 3.6.3 ." + "deeplake version should be >= 3.6.3, but you've installed" + f" {deeplake.__version__}. Consider upgrading deeplake version \ + pip install --upgrade deeplake." ) self.dataset_path = dataset_path @@ -303,7 +304,10 @@ class DeepLake(VectorStore): ) if embedding_function: - _embedding_function = embedding_function + if isinstance(embedding_function, Embeddings): + _embedding_function = embedding_function.embed_query + else: + _embedding_function = embedding_function elif self._embedding_function: _embedding_function = self._embedding_function.embed_query else: