Harrison/updating docs (#1196)

1 year ago · d90a287d8f
parent b7708bbec6
commit d90a287d8f
10 changed files with 54 additions and 46 deletions
--- a/docs/ecosystem/runhouse.md
+++ b/docs/ecosystem/runhouse.md
@ -26,6 +26,6 @@ the `SelfHostedEmbedding` class.
 from langchain.llms import SelfHostedPipeline, SelfHostedHuggingFaceLLM
 ```

-For a more detailed walkthrough of the Self-hosted Embeddings, see [this notebook](../modules/utils/combine_docs_examples/embeddings.ipynb)
+For a more detailed walkthrough of the Self-hosted Embeddings, see [this notebook](../modules/indexes/examples/embeddings.ipynb)

 ##
--- a/docs/modules/chains/async_chain.ipynb
+++ b/docs/modules/chains/async_chain.ipynb
@ -9,7 +9,7 @@
    "\n",
    "LangChain provides async support for Chains by leveraging the [asyncio](https://docs.python.org/3/library/asyncio.html) library.\n",
    "\n",
-    "Async methods are currently supported in `LLMChain` (through `arun`, `apredict`, `acall`) and `LLMMathChain` (through `arun` and `acall`), `ChatVectorDBChain`, and [QA chains](https://langchain.readthedocs.io/en/latest/modules/chains/combine_docs_examples/question_answering.html). Async support for other chains is on the roadmap."
+    "Async methods are currently supported in `LLMChain` (through `arun`, `apredict`, `acall`) and `LLMMathChain` (through `arun` and `acall`), `ChatVectorDBChain`, and [QA chains](../indexes/chain_examples/question_answering.html). Async support for other chains is on the roadmap."
   ]
  },
  {
@ -124,7 +124,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/modules/document_loaders/examples/gitbook.ipynb
+++ b/docs/modules/document_loaders/examples/gitbook.ipynb
@ -1,7 +1,6 @@
 {
 "cells": [
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "4babfba5",
   "metadata": {},
@ -31,7 +30,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "65d5ddce",
   "metadata": {},
@ -71,18 +69,17 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "c325048c",
   "metadata": {},
   "source": [
    "### Load from all paths in a given GitBook\n",
-    "For this to work, the GitbookLoader needs to be initialized with the root path (`https://docs.gitbook.com` in this example)."
+    "For this to work, the GitbookLoader needs to be initialized with the root path (`https://docs.gitbook.com` in this example) and have `load_all_paths` set to `True`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "938ff4ee",
   "metadata": {},
   "outputs": [
@ -122,12 +119,13 @@
    }
   ],
   "source": [
-    "all_pages_data = loader.load_from_all_paths()"
+    "loader = GitbookLoader(\"https://docs.gitbook.com\", load_all_paths=True)\n",
+    "all_pages_data = loader.load()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "db92fc39",
   "metadata": {},
   "outputs": [
@ -144,7 +142,7 @@
       "Document(page_content=\"Import\\nFind out how to easily migrate your existing documentation and which formats are supported.\\nThe import function allows you to migrate and unify existing documentation in GitBook. You can choose to import single or multiple pages although limits apply. \\nPermissions\\nAll members with editor permission or above can use the import feature.\\nSupported formats\\nGitBook supports imports from websites or files that are:\\nMarkdown (.md or .markdown)\\nHTML (.html)\\nMicrosoft Word (.docx).\\nWe also support import from:\\nConfluence\\nNotion\\nGitHub Wiki\\nQuip\\nDropbox Paper\\nGoogle Docs\\nYou can also upload a ZIP\\n \\ncontaining HTML or Markdown files when \\nimporting multiple pages.\\nNote: this feature is in beta.\\nFeel free to suggest import sources we don't support yet and \\nlet us know\\n if you have any issues.\\nImport panel\\nWhen you create a new space, you'll have the option to import content straight away:\\nThe new page menu\\nImport a page or subpage by selecting \\nImport Page\\n from the New Page menu, or \\nImport Subpage\\n in the page action menu, found in the table of contents:\\nImport from the page action menu\\nWhen you choose your input source, instructions will explain how to proceed.\\nAlthough GitBook supports importing content from different kinds of sources, the end result might be different from your source due to differences in product features and document format.\\nLimits\\nGitBook currently has the following limits for imported content:\\nThe maximum number of pages that can be uploaded in a single import is \\n20.\\nThe maximum number of files (images etc.) that can be uploaded in a single import is \\n20.\\nGetting started - \\nPrevious\\nOverview\\nNext\\n - Getting started\\nGit Sync\\nLast modified \\n4mo ago\", lookup_str='', metadata={'source': 'https://docs.gitbook.com/getting-started/import', 'title': 'Import'}, lookup_index=0)"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -154,11 +152,19 @@
    "# show second document\n",
    "all_pages_data[2]"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92cb3eda",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@ -172,7 +178,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.9.1"
  },
  "vscode": {
   "interpreter": {
--- a/docs/use_cases/combine_docs.md
+++ b/docs/use_cases/combine_docs.md
@ -82,7 +82,7 @@ for language models.
 ## Augmenting
 So you've fetched your relevant data - now what? How do you pass them to the language model in a format it can understand?
 For a detailed overview of the different ways of doing so, and the tradeoffs between them, please see 
-[this documentation](../modules/chains/combine_docs.md)
+[this documentation](../modules/indexes/combine_docs.md)

 ## Use Cases
 LangChain supports the above three methods of augmenting LLMs with external data.
--- a/docs/use_cases/question_answering.md
+++ b/docs/use_cases/question_answering.md
@ -12,8 +12,8 @@ chain.run(input_documents=docs, question=query)
 ```

 The following resources exist:
- [Question Answering Notebook](/modules/chains/combine_docs_examples/question_answering.ipynb): A notebook walking through how to accomplish this task.
- [VectorDB Question Answering Notebook](/modules/chains/combine_docs_examples/vector_db_qa.ipynb): A notebook walking through how to do question answering over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
+- [Question Answering Notebook](/modules/indexes/chain_examples/question_answering.ipynb): A notebook walking through how to accomplish this task.
+- [VectorDB Question Answering Notebook](/modules/indexes/chain_examples/vector_db_qa.ipynb): A notebook walking through how to do question answering over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.

 ### Adding in sources

@ -28,12 +28,12 @@ chain({"input_documents": docs, "question": query}, return_only_outputs=True)
 ```

 The following resources exist:
- [QA With Sources Notebook](/modules/chains/combine_docs_examples/qa_with_sources.ipynb): A notebook walking through how to accomplish this task.
- [VectorDB QA With Sources Notebook](/modules/chains/combine_docs_examples/vector_db_qa_with_sources.ipynb): A notebook walking through how to do question answering with sources over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
+- [QA With Sources Notebook](/modules/indexes/chain_examples/qa_with_sources.ipynb): A notebook walking through how to accomplish this task.
+- [VectorDB QA With Sources Notebook](/modules/indexes/chain_examples/vector_db_qa_with_sources.ipynb): A notebook walking through how to do question answering with sources over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.

 ### Additional Related Resources

 Additional related resources include:
 - [Utilities for working with Documents](/modules/utils/how_to_guides.rst): Guides on how to use several of the utilities which will prove helpful for this task, including Text Splitters (for splitting up long documents) and Embeddings & Vectorstores (useful for the above Vector DB example).
- [CombineDocuments Chains](/modules/chains/combine_docs.md): A conceptual overview of specific types of chains by which you can accomplish this task.
+- [CombineDocuments Chains](/modules/indexes/combine_docs.md): A conceptual overview of specific types of chains by which you can accomplish this task.
 - [Data Augmented Generation](combine_docs.md): An overview of data augmented generation, which is the general concept of combining external data with LLMs (of which this is a subset).
--- a/docs/use_cases/summarization.md
+++ b/docs/use_cases/summarization.md
@ -12,9 +12,9 @@ chain.run(docs)
 ```

 The following resources exist:
- [Summarization Notebook](../modules/chains/combine_docs_examples/summarize.ipynb): A notebook walking through how to accomplish this task.
+- [Summarization Notebook](../modules/indexes/chain_examples/summarize.ipynb): A notebook walking through how to accomplish this task.

 Additional related resources include:
 - [Utilities for working with Documents](../modules/utils/how_to_guides.rst): Guides on how to use several of the utilities which will prove helpful for this task, including Text Splitters (for splitting up long documents).
- [CombineDocuments Chains](../modules/chains/combine_docs.md): A conceptual overview of specific types of chains by which you can accomplish this task.
+- [CombineDocuments Chains](../modules/indexes/combine_docs.md): A conceptual overview of specific types of chains by which you can accomplish this task.
 - [Data Augmented Generation](./combine_docs.md): An overview of data augmented generation, which is the general concept of combining external data with LLMs (of which this is a subset).
--- a/langchain/init.py
+++ b/langchain/init.py
@ -28,7 +28,6 @@ from langchain.llms import (
    Cohere,
    ForefrontAI,
    GooseAI,
-    HuggingFaceEndpoint,
    HuggingFaceHub,
    OpenAI,
    Petals,
@ -80,7 +79,6 @@ __all__ = [
    "PromptTemplate",
    "ReActChain",
    "Wikipedia",
-    "HuggingFaceEndpoint",
    "HuggingFaceHub",
    "HuggingFacePipeline",
    "SQLDatabase",
--- a/langchain/document_loaders/gitbook.py
+++ b/langchain/document_loaders/gitbook.py
@ -12,25 +12,26 @@ class GitbookLoader(WebBaseLoader):
    2. load all (relative) paths in the navbar.
    """

-    def load(self, custom_web_path: Optional[str] = None) -> List[Document]:
+    def __init__(self, web_page: str, load_all_paths: bool = False):
+        """Initialize with web page and whether to load all paths."""
+        super().__init__(web_page)
+        self.load_all_paths = load_all_paths
+
+    def load(self) -> List[Document]:
        """Fetch text from one single GitBook page."""
-        soup_info = self.scrape(custom_web_path)
-        url = custom_web_path if custom_web_path else self.web_path
-        return [self._get_document(soup_info, url)]
-
-    def load_from_all_paths(self) -> List[Document]:
-        """Fetch text from all pages in the navbar.
-
-        Make sure the initialized web_path is the root of the GitBook
-        """
-        soup_info = self.scrape()
-        relative_paths = self._get_paths(soup_info)
-        documents = []
-        for path in relative_paths:
-            url = self.web_path + path
-            print(f"Fetching text from {url}")
-            documents += self.load(url)
-        return documents
+        if self.load_all_paths:
+            soup_info = self.scrape()
+            relative_paths = self._get_paths(soup_info)
+            documents = []
+            for path in relative_paths:
+                url = self.web_path + path
+                print(f"Fetching text from {url}")
+                soup_info = self._scrape(url)
+                documents.append(self._get_document(soup_info, url))
+            return documents
+        else:
+            soup_info = self.scrape()
+            return [self._get_document(soup_info, self.web_path)]

    def _get_document(self, soup: Any, custom_url: Optional[str] = None) -> Document:
        """Fetch content from page and return Document."""
--- a/langchain/document_loaders/web_base.py
+++ b/langchain/document_loaders/web_base.py
@ -1,5 +1,5 @@
 """Web base loader class."""
-from typing import Any, List, Optional
+from typing import Any, List

 import requests

@ -14,15 +14,18 @@ class WebBaseLoader(BaseLoader):
        """Initialize with webpage path."""
        self.web_path = web_path

-    def scrape(self, custom_web_path: Optional[str] = None) -> Any:
-        """Scrape data from webpage and return it in BeautifulSoup format."""
+    @staticmethod
+    def _scrape(url: str) -> Any:
        from bs4 import BeautifulSoup

-        url = custom_web_path if custom_web_path else self.web_path
        html_doc = requests.get(url)
        soup = BeautifulSoup(html_doc.text, "html.parser")
        return soup

+    def scrape(self) -> Any:
+        """Scrape data from webpage and return it in BeautifulSoup format."""
+        return self._scrape(self.web_path)
+
    def load(self) -> List[Document]:
        """Load data into document objects."""
        soup = self.scrape()
--- a/langchain/llms/huggingface_endpoint.py
+++ b/langchain/llms/huggingface_endpoint.py
@ -23,7 +23,7 @@ class HuggingFaceEndpoint(LLM, BaseModel):
    Example:
        .. code-block:: python

-            from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint
+            from langchain.llms import HuggingFaceEndpoint
            endpoint_url = (
                "https://abcdefghijklmnop.us-east-1.aws.endpoints.huggingface.cloud"
            )