diff --git a/docs/modules/document_loaders/examples/azure_blob_storage_container.ipynb b/docs/modules/indexes/document_loaders/examples/azure_blob_storage_container.ipynb similarity index 100% rename from docs/modules/document_loaders/examples/azure_blob_storage_container.ipynb rename to docs/modules/indexes/document_loaders/examples/azure_blob_storage_container.ipynb diff --git a/docs/modules/document_loaders/examples/azure_blob_storage_file.ipynb b/docs/modules/indexes/document_loaders/examples/azure_blob_storage_file.ipynb similarity index 100% rename from docs/modules/document_loaders/examples/azure_blob_storage_file.ipynb rename to docs/modules/indexes/document_loaders/examples/azure_blob_storage_file.ipynb diff --git a/docs/modules/indexes/document_loaders/examples/college_confidential.ipynb b/docs/modules/indexes/document_loaders/examples/college_confidential.ipynb index a01eecb7..1faf3a7c 100644 --- a/docs/modules/indexes/document_loaders/examples/college_confidential.ipynb +++ b/docs/modules/indexes/document_loaders/examples/college_confidential.ipynb @@ -85,7 +85,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.1" + "version": "3.9.1" } }, "nbformat": 4, diff --git a/docs/modules/indexes/document_loaders/examples/sitemap.ipynb b/docs/modules/indexes/document_loaders/examples/sitemap.ipynb new file mode 100644 index 00000000..97e40586 --- /dev/null +++ b/docs/modules/indexes/document_loaders/examples/sitemap.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sitemap Loader\n", + "\n", + "Extends from the [WebBaseLoader](), this will load a sitemap from a given URL, and then scrape and load all the pages in the sitemap, returning each page as a document.\n", + "\n", + "The scraping is done concurrently, using `WebBaseLoader`. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the server you are scraping and don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: nest_asyncio in /Users/tasp/Code/projects/langchain/.venv/lib/python3.10/site-packages (1.5.6)\r\n", + "\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n" + ] + } + ], + "source": [ + "!pip install nest_asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# fixes a bug with asyncio and jupyter\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders.sitemap import SitemapLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sitemap_loader = SitemapLoader(web_path=\"https://langchain.readthedocs.io/sitemap.xml\")\n", + "\n", + "docs = sitemap_loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(page_content='\\n\\n\\n\\n\\n\\nWelcome to LangChain — 🦜🔗 LangChain 0.0.123\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSkip to main content\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCtrl+K\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n🦜🔗 LangChain 0.0.123\\n\\n\\n\\nGetting Started\\n\\nQuickstart Guide\\n\\nModules\\n\\nPrompt Templates\\nGetting Started\\nKey Concepts\\nHow-To Guides\\nCreate a custom prompt template\\nCreate a custom example selector\\nProvide few shot examples to a prompt\\nPrompt Serialization\\nExample Selectors\\nOutput Parsers\\n\\n\\nReference\\nPromptTemplates\\nExample Selector\\n\\n\\n\\n\\nLLMs\\nGetting Started\\nKey Concepts\\nHow-To Guides\\nGeneric Functionality\\nCustom LLM\\nFake LLM\\nLLM Caching\\nLLM Serialization\\nToken Usage Tracking\\n\\n\\nIntegrations\\nAI21\\nAleph Alpha\\nAnthropic\\nAzure OpenAI LLM Example\\nBanana\\nCerebriumAI LLM Example\\nCohere\\nDeepInfra LLM Example\\nForefrontAI LLM Example\\nGooseAI LLM Example\\nHugging Face Hub\\nManifest\\nModal\\nOpenAI\\nPetals LLM Example\\nPromptLayer OpenAI\\nSageMakerEndpoint\\nSelf-Hosted Models via Runhouse\\nStochasticAI\\nWriter\\n\\n\\nAsync API for LLM\\nStreaming with LLMs\\n\\n\\nReference\\n\\n\\nDocument Loaders\\nKey Concepts\\nHow To Guides\\nCoNLL-U\\nAirbyte JSON\\nAZLyrics\\nBlackboard\\nCollege Confidential\\nCopy Paste\\nCSV Loader\\nDirectory Loader\\nEmail\\nEverNote\\nFacebook Chat\\nFigma\\nGCS Directory\\nGCS File Storage\\nGitBook\\nGoogle Drive\\nGutenberg\\nHacker News\\nHTML\\niFixit\\nImages\\nIMSDb\\nMarkdown\\nNotebook\\nNotion\\nObsidian\\nPDF\\nPowerPoint\\nReadTheDocs Documentation\\nRoam\\ns3 Directory\\ns3 File\\nSubtitle Files\\nTelegram\\nUnstructured File Loader\\nURL\\nWeb Base\\nWord Documents\\nYouTube\\n\\n\\n\\n\\nUtils\\nKey Concepts\\nGeneric Utilities\\nBash\\nBing Search\\nGoogle Search\\nGoogle Serper API\\nIFTTT WebHooks\\nPython REPL\\nRequests\\nSearxNG Search API\\nSerpAPI\\nWolfram Alpha\\nZapier Natural Language Actions API\\n\\n\\nReference\\nPython REPL\\nSerpAPI\\nSearxNG Search\\nDocstore\\nText Splitter\\nEmbeddings\\nVectorStores\\n\\n\\n\\n\\nIndexes\\nGetting Started\\nKey Concepts\\nHow To Guides\\nEmbeddings\\nHypothetical Document Embeddings\\nText Splitter\\nVectorStores\\nAtlasDB\\nChroma\\nDeep Lake\\nElasticSearch\\nFAISS\\nMilvus\\nOpenSearch\\nPGVector\\nPinecone\\nQdrant\\nRedis\\nWeaviate\\nChatGPT Plugin Retriever\\nVectorStore Retriever\\nAnalyze Document\\nChat Index\\nGraph QA\\nQuestion Answering with Sources\\nQuestion Answering\\nSummarization\\nRetrieval Question/Answering\\nRetrieval Question Answering with Sources\\nVector DB Text Generation\\n\\n\\n\\n\\nChains\\nGetting Started\\nHow-To Guides\\nGeneric Chains\\nLoading from LangChainHub\\nLLM Chain\\nSequential Chains\\nSerialization\\nTransformation Chain\\n\\n\\nUtility Chains\\nAPI Chains\\nSelf-Critique Chain with Constitutional AI\\nBashChain\\nLLMCheckerChain\\nLLM Math\\nLLMRequestsChain\\nLLMSummarizationCheckerChain\\nModeration\\nPAL\\nSQLite example\\n\\n\\nAsync API for Chain\\n\\n\\nKey Concepts\\nReference\\n\\n\\nAgents\\nGetting Started\\nKey Concepts\\nHow-To Guides\\nAgents and Vectorstores\\nAsync API for Agent\\nConversation Agent (for Chat Models)\\nChatGPT Plugins\\nCustom Agent\\nDefining Custom Tools\\nHuman as a tool\\nIntermediate Steps\\nLoading from LangChainHub\\nMax Iterations\\nMulti Input Tools\\nSearch Tools\\nSerialization\\nAdding SharedMemory to an Agent and its Tools\\nCSV Agent\\nJSON Agent\\nOpenAPI Agent\\nPandas Dataframe Agent\\nPython Agent\\nSQL Database Agent\\nVectorstore Agent\\nMRKL\\nMRKL Chat\\nReAct\\nSelf Ask With Search\\n\\n\\nReference\\n\\n\\nMemory\\nGetting Started\\nKey Concepts\\nHow-To Guides\\nConversationBufferMemory\\nConversationBufferWindowMemory\\nEntity Memory\\nConversation Knowledge Graph Memory\\nConversationSummaryMemory\\nConversationSummaryBufferMemory\\nConversationTokenBufferMemory\\nAdding Memory To an LLMChain\\nAdding Memory to a Multi-Input Chain\\nAdding Memory to an Agent\\nChatGPT Clone\\nConversation Agent\\nConversational Memory Customization\\nCustom Memory\\nMultiple Memory\\n\\n\\n\\n\\nChat\\nGetting Started\\nKey Concepts\\nHow-To Guides\\nAgent\\nChat Vector DB\\nFew Shot Examples\\nMemory\\nPromptLayer ChatOpenAI\\nStreaming\\nRetrieval Question/Answering\\nRetrieval Question Answering with Sources\\n\\n\\n\\n\\n\\nUse Cases\\n\\nAgents\\nChatbots\\nGenerate Examples\\nData Augmented Generation\\nQuestion Answering\\nSummarization\\nQuerying Tabular Data\\nExtraction\\nEvaluation\\nAgent Benchmarking: Search + Calculator\\nAgent VectorDB Question Answering Benchmarking\\nBenchmarking Template\\nData Augmented Question Answering\\nUsing Hugging Face Datasets\\nLLM Math\\nQuestion Answering Benchmarking: Paul Graham Essay\\nQuestion Answering Benchmarking: State of the Union Address\\nQA Generation\\nQuestion Answering\\nSQL Question Answering Benchmarking: Chinook\\n\\n\\nModel Comparison\\n\\nReference\\n\\nInstallation\\nIntegrations\\nAPI References\\nPrompts\\nPromptTemplates\\nExample Selector\\n\\n\\nUtilities\\nPython REPL\\nSerpAPI\\nSearxNG Search\\nDocstore\\nText Splitter\\nEmbeddings\\nVectorStores\\n\\n\\nChains\\nAgents\\n\\n\\n\\nEcosystem\\n\\nLangChain Ecosystem\\nAI21 Labs\\nAtlasDB\\nBanana\\nCerebriumAI\\nChroma\\nCohere\\nDeepInfra\\nDeep Lake\\nForefrontAI\\nGoogle Search Wrapper\\nGoogle Serper Wrapper\\nGooseAI\\nGraphsignal\\nHazy Research\\nHelicone\\nHugging Face\\nMilvus\\nModal\\nNLPCloud\\nOpenAI\\nOpenSearch\\nPetals\\nPGVector\\nPinecone\\nPromptLayer\\nQdrant\\nRunhouse\\nSearxNG Search API\\nSerpAPI\\nStochasticAI\\nUnstructured\\nWeights & Biases\\nWeaviate\\nWolfram Alpha Wrapper\\nWriter\\n\\n\\n\\nAdditional Resources\\n\\nLangChainHub\\nGlossary\\nLangChain Gallery\\nDeployments\\nTracing\\nDiscord\\nProduction Support\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n.rst\\n\\n\\n\\n\\n\\n\\n\\n.pdf\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nWelcome to LangChain\\n\\n\\n\\n\\n Contents \\n\\n\\n\\nGetting Started\\nModules\\nUse Cases\\nReference Docs\\nLangChain Ecosystem\\nAdditional Resources\\n\\n\\n\\n\\n\\n\\n\\n\\nWelcome to LangChain#\\nLarge language models (LLMs) are emerging as a transformative technology, enabling\\ndevelopers to build applications that they previously could not.\\nBut using these LLMs in isolation is often not enough to\\ncreate a truly powerful app - the real power comes when you are able to\\ncombine them with other sources of computation or knowledge.\\nThis library is aimed at assisting in the development of those types of applications. Common examples of these types of applications include:\\n❓ Question Answering over specific documents\\n\\nDocumentation\\nEnd-to-end Example: Question Answering over Notion Database\\n\\n💬 Chatbots\\n\\nDocumentation\\nEnd-to-end Example: Chat-LangChain\\n\\n🤖 Agents\\n\\nDocumentation\\nEnd-to-end Example: GPT+WolframAlpha\\n\\n\\nGetting Started#\\nCheckout the below guide for a walkthrough of how to get started using LangChain to create an Language Model application.\\n\\nGetting Started Documentation\\n\\n\\n\\n\\n\\nModules#\\nThere are several main modules that LangChain provides support for.\\nFor each module we provide some examples to get started, how-to guides, reference docs, and conceptual guides.\\nThese modules are, in increasing order of complexity:\\n\\nPrompts: This includes prompt management, prompt optimization, and prompt serialization.\\nLLMs: This includes a generic interface for all LLMs, and common utilities for working with LLMs.\\nDocument Loaders: This includes a standard interface for loading documents, as well as specific integrations to all types of text data sources.\\nUtils: Language models are often more powerful when interacting with other sources of knowledge or computation. This can include Python REPLs, embeddings, search engines, and more. LangChain provides a large collection of common utils to use in your application.\\nChains: Chains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\\nIndexes: Language models are often more powerful when combined with your own text data - this module covers best practices for doing exactly that.\\nAgents: Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents.\\nMemory: Memory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\\nChat: Chat models are a variation on Language Models that expose a different API - rather than working with raw text, they work with messages. LangChain provides a standard interface for working with them and doing all the same things as above.\\n\\n\\n\\n\\n\\nUse Cases#\\nThe above modules can be used in a variety of ways. LangChain also provides guidance and assistance in this. Below are some of the common use cases LangChain supports.\\n\\nAgents: Agents are systems that use a language model to interact with other tools. These can be used to do more grounded question/answering, interact with APIs, or even take actions.\\nChatbots: Since language models are good at producing text, that makes them ideal for creating chatbots.\\nData Augmented Generation: Data Augmented Generation involves specific types of chains that first interact with an external datasource to fetch data to use in the generation step. Examples of this include summarization of long pieces of text and question/answering over specific data sources.\\nQuestion Answering: Answering questions over specific documents, only utilizing the information in those documents to construct an answer. A type of Data Augmented Generation.\\nSummarization: Summarizing longer documents into shorter, more condensed chunks of information. A type of Data Augmented Generation.\\nQuerying Tabular Data: If you want to understand how to use LLMs to query data that is stored in a tabular format (csvs, SQL, dataframes, etc) you should read this page.\\nEvaluation: Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\\nGenerate similar examples: Generating similar examples to a given input. This is a common use case for many applications, and LangChain provides some prompts/chains for assisting in this.\\nCompare models: Experimenting with different prompts, models, and chains is a big part of developing the best possible application. The ModelLaboratory makes it easy to do so.\\n\\n\\n\\n\\n\\nReference Docs#\\nAll of LangChain’s reference documentation, in one place. Full documentation on all methods, classes, installation methods, and integration setups for LangChain.\\n\\nReference Documentation\\n\\n\\n\\n\\n\\nLangChain Ecosystem#\\nGuides for how other companies/products can be used with LangChain\\n\\nLangChain Ecosystem\\n\\n\\n\\n\\n\\nAdditional Resources#\\nAdditional collection of resources we think may be useful as you develop your application!\\n\\nLangChainHub: The LangChainHub is a place to share and explore other prompts, chains, and agents.\\nGlossary: A glossary of all related terms, papers, methods, etc. Whether implemented in LangChain or not!\\nGallery: A collection of our favorite projects that use LangChain. Useful for finding inspiration or seeing how things were done in other applications.\\nDeployments: A collection of instructions, code snippets, and template repositories for deploying LangChain apps.\\nDiscord: Join us on our Discord to discuss all things LangChain!\\nTracing: A guide on using tracing in LangChain to visualize the execution of chains and agents.\\nProduction Support: As you move your LangChains into production, we’d love to offer more comprehensive support. Please fill out this form and we’ll set up a dedicated support Slack channel.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nnext\\nQuickstart Guide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Contents\\n \\n\\n\\nGetting Started\\nModules\\nUse Cases\\nReference Docs\\nLangChain Ecosystem\\nAdditional Resources\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBy Harrison Chase\\n\\n\\n\\n\\n \\n © Copyright 2023, Harrison Chase.\\n \\n\\n\\n\\n\\n Last updated on Mar 24, 2023.\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n', lookup_str='', metadata={'source': 'https://python.langchain.com/en/stable/', 'loc': 'https://python.langchain.com/en/stable/', 'lastmod': '2023-03-24T19:30:54.647430+00:00', 'changefreq': 'weekly', 'priority': '1'}, lookup_index=0)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filtering sitemap URLs\n", + "\n", + "Sitemaps can be massive files, with thousands of urls. Often you don't need every single one of them. You can filter the urls by passing a list of strings or regex patterns to the `url_filter` parameter. Only urls that match one of the patterns will be loaded." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "loader = SitemapLoader(\n", + " \"https://langchain.readthedocs.io/sitemap.xml\",\n", + " filter_urls=[\"https://python.langchain.com/en/latest/\"]\n", + ")\n", + "documents = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(page_content='\\n\\n\\n\\n\\n\\nWelcome to LangChain — 🦜🔗 LangChain 0.0.123\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSkip to main content\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCtrl+K\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n🦜🔗 LangChain 0.0.123\\n\\n\\n\\nGetting Started\\n\\nQuickstart Guide\\n\\nModules\\n\\nModels\\nLLMs\\nGetting Started\\nGeneric Functionality\\nHow to use the async API for LLMs\\nHow to write a custom LLM wrapper\\nHow (and why) to use the fake LLM\\nHow to cache LLM calls\\nHow to serialize LLM classes\\nHow to stream LLM responses\\nHow to track token usage\\n\\n\\nIntegrations\\nAI21\\nAleph Alpha\\nAnthropic\\nAzure OpenAI LLM Example\\nBanana\\nCerebriumAI LLM Example\\nCohere\\nDeepInfra LLM Example\\nForefrontAI LLM Example\\nGooseAI LLM Example\\nHugging Face Hub\\nManifest\\nModal\\nOpenAI\\nPetals LLM Example\\nPromptLayer OpenAI\\nSageMakerEndpoint\\nSelf-Hosted Models via Runhouse\\nStochasticAI\\nWriter\\n\\n\\nReference\\n\\n\\nChat Models\\nGetting Started\\nHow-To Guides\\nHow to use few shot examples\\nHow to stream responses\\n\\n\\nIntegrations\\nAzure\\nOpenAI\\nPromptLayer ChatOpenAI\\n\\n\\n\\n\\nText Embedding Models\\nAzureOpenAI\\nCohere\\nFake Embeddings\\nHugging Face Hub\\nInstructEmbeddings\\nOpenAI\\nSageMaker Endpoint Embeddings\\nSelf Hosted Embeddings\\nTensorflowHub\\n\\n\\n\\n\\nPrompts\\nPrompt Templates\\nGetting Started\\nHow-To Guides\\nHow to create a custom prompt template\\nHow to create a prompt template that uses few shot examples\\nHow to work with partial Prompt Templates\\nHow to serialize prompts\\n\\n\\nReference\\nPromptTemplates\\nExample Selector\\n\\n\\n\\n\\nChat Prompt Template\\nExample Selectors\\nHow to create a custom example selector\\nLengthBased ExampleSelector\\nMaximal Marginal Relevance ExampleSelector\\nNGram Overlap ExampleSelector\\nSimilarity ExampleSelector\\n\\n\\nOutput Parsers\\nOutput Parsers\\nCommaSeparatedListOutputParser\\nOutputFixingParser\\nPydanticOutputParser\\nRetryOutputParser\\nStructured Output Parser\\n\\n\\n\\n\\nIndexes\\nGetting Started\\nDocument Loaders\\nCoNLL-U\\nAirbyte JSON\\nAZLyrics\\nBlackboard\\nCollege Confidential\\nCopy Paste\\nCSV Loader\\nDirectory Loader\\nEmail\\nEverNote\\nFacebook Chat\\nFigma\\nGCS Directory\\nGCS File Storage\\nGitBook\\nGoogle Drive\\nGutenberg\\nHacker News\\nHTML\\niFixit\\nImages\\nIMSDb\\nMarkdown\\nNotebook\\nNotion\\nObsidian\\nPDF\\nPowerPoint\\nReadTheDocs Documentation\\nRoam\\ns3 Directory\\ns3 File\\nSubtitle Files\\nTelegram\\nUnstructured File Loader\\nURL\\nWeb Base\\nWord Documents\\nYouTube\\n\\n\\nText Splitters\\nGetting Started\\nCharacter Text Splitter\\nHuggingFace Length Function\\nLatex Text Splitter\\nMarkdown Text Splitter\\nNLTK Text Splitter\\nPython Code Text Splitter\\nRecursiveCharaterTextSplitter\\nSpacy Text Splitter\\ntiktoken (OpenAI) Length Function\\nTiktokenText Splitter\\n\\n\\nVectorstores\\nGetting Started\\nAtlasDB\\nChroma\\nDeep Lake\\nElasticSearch\\nFAISS\\nMilvus\\nOpenSearch\\nPGVector\\nPinecone\\nQdrant\\nRedis\\nWeaviate\\n\\n\\nRetrievers\\nChatGPT Plugin Retriever\\nVectorStore Retriever\\n\\n\\n\\n\\nMemory\\nGetting Started\\nHow-To Guides\\nConversationBufferMemory\\nConversationBufferWindowMemory\\nEntity Memory\\nConversation Knowledge Graph Memory\\nConversationSummaryMemory\\nConversationSummaryBufferMemory\\nConversationTokenBufferMemory\\nHow to add Memory to an LLMChain\\nHow to add memory to a Multi-Input Chain\\nHow to add Memory to an Agent\\nHow to customize conversational memory\\nHow to create a custom Memory class\\nHow to use multiple memroy classes in the same chain\\n\\n\\n\\n\\nChains\\nGetting Started\\nHow-To Guides\\nAsync API for Chain\\nLoading from LangChainHub\\nLLM Chain\\nSequential Chains\\nSerialization\\nTransformation Chain\\nAnalyze Document\\nChat Index\\nGraph QA\\nHypothetical Document Embeddings\\nQuestion Answering with Sources\\nQuestion Answering\\nSummarization\\nRetrieval Question/Answering\\nRetrieval Question Answering with Sources\\nVector DB Text Generation\\nAPI Chains\\nSelf-Critique Chain with Constitutional AI\\nBashChain\\nLLMCheckerChain\\nLLM Math\\nLLMRequestsChain\\nLLMSummarizationCheckerChain\\nModeration\\nPAL\\nSQLite example\\n\\n\\nReference\\n\\n\\nAgents\\nGetting Started\\nTools\\nGetting Started\\nDefining Custom Tools\\nMulti Input Tools\\nBash\\nBing Search\\nChatGPT Plugins\\nGoogle Search\\nGoogle Serper API\\nHuman as a tool\\nIFTTT WebHooks\\nPython REPL\\nRequests\\nSearch Tools\\nSearxNG Search API\\nSerpAPI\\nWolfram Alpha\\nZapier Natural Language Actions API\\n\\n\\nAgents\\nAgent Types\\nCustom Agent\\nConversation Agent (for Chat Models)\\nConversation Agent\\nMRKL\\nMRKL Chat\\nReAct\\nSelf Ask With Search\\n\\n\\nToolkits\\nCSV Agent\\nJSON Agent\\nOpenAPI Agent\\nPandas Dataframe Agent\\nPython Agent\\nSQL Database Agent\\nVectorstore Agent\\n\\n\\nAgent Executors\\nHow to combine agents and vectorstores\\nHow to use the async API for Agents\\nHow to create ChatGPT Clone\\nHow to access intermediate steps\\nHow to cap the max number of iterations\\nHow to add SharedMemory to an Agent and its Tools\\n\\n\\n\\n\\n\\nUse Cases\\n\\nPersonal Assistants\\nQuestion Answering over Docs\\nChatbots\\nQuerying Tabular Data\\nInteracting with APIs\\nSummarization\\nExtraction\\nEvaluation\\nAgent Benchmarking: Search + Calculator\\nAgent VectorDB Question Answering Benchmarking\\nBenchmarking Template\\nData Augmented Question Answering\\nUsing Hugging Face Datasets\\nLLM Math\\nQuestion Answering Benchmarking: Paul Graham Essay\\nQuestion Answering Benchmarking: State of the Union Address\\nQA Generation\\nQuestion Answering\\nSQL Question Answering Benchmarking: Chinook\\n\\n\\n\\nReference\\n\\nInstallation\\nIntegrations\\nAPI References\\nPrompts\\nPromptTemplates\\nExample Selector\\n\\n\\nUtilities\\nPython REPL\\nSerpAPI\\nSearxNG Search\\nDocstore\\nText Splitter\\nEmbeddings\\nVectorStores\\n\\n\\nChains\\nAgents\\n\\n\\n\\nEcosystem\\n\\nLangChain Ecosystem\\nAI21 Labs\\nAtlasDB\\nBanana\\nCerebriumAI\\nChroma\\nCohere\\nDeepInfra\\nDeep Lake\\nForefrontAI\\nGoogle Search Wrapper\\nGoogle Serper Wrapper\\nGooseAI\\nGraphsignal\\nHazy Research\\nHelicone\\nHugging Face\\nMilvus\\nModal\\nNLPCloud\\nOpenAI\\nOpenSearch\\nPetals\\nPGVector\\nPinecone\\nPromptLayer\\nQdrant\\nRunhouse\\nSearxNG Search API\\nSerpAPI\\nStochasticAI\\nUnstructured\\nWeights & Biases\\nWeaviate\\nWolfram Alpha Wrapper\\nWriter\\n\\n\\n\\nAdditional Resources\\n\\nLangChainHub\\nGlossary\\nLangChain Gallery\\nDeployments\\nTracing\\nDiscord\\nProduction Support\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n.rst\\n\\n\\n\\n\\n\\n\\n\\n.pdf\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nWelcome to LangChain\\n\\n\\n\\n\\n Contents \\n\\n\\n\\nGetting Started\\nModules\\nUse Cases\\nReference Docs\\nLangChain Ecosystem\\nAdditional Resources\\n\\n\\n\\n\\n\\n\\n\\n\\nWelcome to LangChain#\\nLangChain is a framework for developing applications powered by language models. We believe that the most powerful and differentiated applications will not only call out to a language model via an API, but will also:\\n\\nBe data-aware: connect a language model to other sources of data\\nBe agentic: allow a language model to interact with its environment\\n\\nThe LangChain framework is designed with the above principles in mind.\\nThis is the Python specific portion of the documentation. For a purely conceptual guide to LangChain, see here. For the JavaScript documentation, see here.\\n\\nGetting Started#\\nCheckout the below guide for a walkthrough of how to get started using LangChain to create an Language Model application.\\n\\nGetting Started Documentation\\n\\n\\n\\n\\n\\nModules#\\nThere are several main modules that LangChain provides support for.\\nFor each module we provide some examples to get started, how-to guides, reference docs, and conceptual guides.\\nThese modules are, in increasing order of complexity:\\n\\nModels: The various model types and model integrations LangChain supports.\\nPrompts: This includes prompt management, prompt optimization, and prompt serialization.\\nMemory: Memory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\\nIndexes: Language models are often more powerful when combined with your own text data - this module covers best practices for doing exactly that.\\nChains: Chains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\\nAgents: Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents.\\n\\n\\n\\n\\n\\nUse Cases#\\nThe above modules can be used in a variety of ways. LangChain also provides guidance and assistance in this. Below are some of the common use cases LangChain supports.\\n\\nPersonal Assistants: The main LangChain use case. Personal assistants need to take actions, remember interactions, and have knowledge about your data.\\nQuestion Answering: The second big LangChain use case. Answering questions over specific documents, only utilizing the information in those documents to construct an answer.\\nChatbots: Since language models are good at producing text, that makes them ideal for creating chatbots.\\nQuerying Tabular Data: If you want to understand how to use LLMs to query data that is stored in a tabular format (csvs, SQL, dataframes, etc) you should read this page.\\nInteracting with APIs: Enabling LLMs to interact with APIs is extremely powerful in order to give them more up-to-date information and allow them to take actions.\\nExtraction: Extract structured information from text.\\nSummarization: Summarizing longer documents into shorter, more condensed chunks of information. A type of Data Augmented Generation.\\nEvaluation: Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\\n\\n\\n\\n\\n\\nReference Docs#\\nAll of LangChain’s reference documentation, in one place. Full documentation on all methods, classes, installation methods, and integration setups for LangChain.\\n\\nReference Documentation\\n\\n\\n\\n\\n\\nLangChain Ecosystem#\\nGuides for how other companies/products can be used with LangChain\\n\\nLangChain Ecosystem\\n\\n\\n\\n\\n\\nAdditional Resources#\\nAdditional collection of resources we think may be useful as you develop your application!\\n\\nLangChainHub: The LangChainHub is a place to share and explore other prompts, chains, and agents.\\nGlossary: A glossary of all related terms, papers, methods, etc. Whether implemented in LangChain or not!\\nGallery: A collection of our favorite projects that use LangChain. Useful for finding inspiration or seeing how things were done in other applications.\\nDeployments: A collection of instructions, code snippets, and template repositories for deploying LangChain apps.\\nTracing: A guide on using tracing in LangChain to visualize the execution of chains and agents.\\nModel Laboratory: Experimenting with different prompts, models, and chains is a big part of developing the best possible application. The ModelLaboratory makes it easy to do so.\\nDiscord: Join us on our Discord to discuss all things LangChain!\\nProduction Support: As you move your LangChains into production, we’d love to offer more comprehensive support. Please fill out this form and we’ll set up a dedicated support Slack channel.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nnext\\nQuickstart Guide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Contents\\n \\n\\n\\nGetting Started\\nModules\\nUse Cases\\nReference Docs\\nLangChain Ecosystem\\nAdditional Resources\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBy Harrison Chase\\n\\n\\n\\n\\n \\n © Copyright 2023, Harrison Chase.\\n \\n\\n\\n\\n\\n Last updated on Mar 27, 2023.\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n', lookup_str='', metadata={'source': 'https://python.langchain.com/en/latest/', 'loc': 'https://python.langchain.com/en/latest/', 'lastmod': '2023-03-27T22:50:49.790324+00:00', 'changefreq': 'daily', 'priority': '0.9'}, lookup_index=0)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "documents[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/docs/modules/indexes/document_loaders/examples/web_base.ipynb b/docs/modules/indexes/document_loaders/examples/web_base.ipynb index bfd9b85d..9b3feff1 100644 --- a/docs/modules/indexes/document_loaders/examples/web_base.ipynb +++ b/docs/modules/indexes/document_loaders/examples/web_base.ipynb @@ -49,7 +49,7 @@ { "data": { "text/plain": [ - "[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBANHLNCAAMNCAAWSoccer…MLBNCAAFGolfTennisSports BettingBoxingCaribbean SeriesCFLNCAACricketF1HorseLLWSMMANASCARNBA G LeagueOlympic SportsRacingRN BBRN FBRugbyWNBAWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nUFC 284: Makhachev vs. Volkanovski (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nMen's College Hoops: Select Games\\n\\n\\n\\n\\n\\n\\n\\nWomen's College Hoops: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nGerman Cup: Round of 16\\n\\n\\n\\n\\n\\n\\n\\n30 For 30: Bullies Of Baltimore\\n\\n\\n\\n\\n\\n\\n\\nMatt Miller's Two-Round NFL Mock Draft\\n\\n\\nQuick Links\\n\\n\\n\\n\\nSuper Bowl LVII\\n\\n\\n\\n\\n\\n\\n\\nSuper Bowl Betting\\n\\n\\n\\n\\n\\n\\n\\nNBA Trade Machine\\n\\n\\n\\n\\n\\n\\n\\nNBA All-Star Game\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nGames For Me\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nAP Photo/Mark J. Terrilllive\\n\\n\\n\\nChristian Wood elevates for the big-time stuffChristian Wood elevates for the big-time stuff15m0:29\\n\\n\\nKyrie Irving nails the treyKyrie Irving nails the trey37m0:17\\n\\n\\nDwight Powell rises up for putback dunkDwight Powell throws down the putback dunk for the Mavericks.38m0:16\\n\\n\\nKyrie sinks his first basket with the MavericksKyrie Irving drains the jump shot early vs. the Clippers for his first points with the Mavericks.39m0:17\\n\\n\\nReggie Bullock pulls up for wide open 3Reggie Bullock is left wide open for the 3-pointer early vs. the Clippers.46m0:21\\n\\n\\n\\nTOP HEADLINESSources: Lakers get PG Russell in 3-team tradeTrail Blazers shipping Hart to Knicks, sources sayUConn loses two straight for first time in 30 yearsNFL's Goodell on officiating: Never been betterNFLPA's Smith: Get rid of 'intrusive' NFL combineAlex Morgan: 'Bizarre' for Saudis to sponsor WWCBills' Hamlin makes appearance to receive awardWWE Hall of Famer Lawler recovering from strokeWhich NFL team trades up to No. 1?NBA TRADE DEADLINE3 P.M. ET ON THURSDAYTrade grades: What to make of the three-team deal involving Russell Westbrook and D'Angelo RussellESPN NBA Insider Kevin Pelton is handing out grades for the biggest moves.2hLayne Murdoch Jr./NBAE via Getty ImagesNBA trade tracker: Grades, details for every deal for the 2022-23 seasonWhich players are finding new homes and which teams are making trades during the free-agency frenzy?59mESPN.comNBA trade deadline: Latest buzz and newsNBA SCOREBOARDWEDNESDAY'S GAMESSee AllCLEAR THE RUNWAYJalen Green soars for lefty alley-oop1h0:19Jarrett Allen skies to drop the hammer2h0:16Once the undisputed greatest, Joe Montana is still working things out15hWright ThompsonSUPER BOWL LVII6:30 P.M. ET ON SUNDAYBarbershop tales, a fistfight and brotherly love: Untold stories that explain the Kelce brothersJason and Travis Kelce will become the first brothers to face each other in a Super Bowl. Here are untold stories from people who know them best.16hTim McManus, +2 MoreEd Zurga/AP PhotoNFL experts predict Chiefs-Eagles: Our Super Bowl winner picksNFL writers, analysts and reporters take their best guesses on the Super Bowl LVII matchup.17hESPN staffBeware of Philadelphia's Rocky statue curseMadden sim predicts Eagles to win Super BowlTOP 10 TEAMS FALLCOLLEGE HOOPSUConn loses two straight for first time since 1993, falling to Marquette57m1:58Vandy drains 3 at buzzer to knock off Tennessee, fans storm the court1h0:54COLLEGE HOOPS SCORESMEN'S AND WOMEN'S TOP-25 GAMESMen's college hoops scoreboardWomen's college basketball scoresPROJECTING THE BUBBLEMEN'S COLLEGE HOOPSBubble Watch: Current situation? North Carolina has some work to doThe countdown to Selection Sunday on March 12 has begun. We will track which teams are locks and which ones can play their way into or out of the 2023 NCAA men's basketball tournament.6hJohn GasawayAP Photo/Matt Rourke Top HeadlinesSources: Lakers get PG Russell in 3-team tradeTrail Blazers shipping Hart to Knicks, sources sayUConn loses two straight for first time in 30 yearsNFL's Goodell on officiating: Never been betterNFLPA's Smith: Get rid of 'intrusive' NFL combineAlex Morgan: 'Bizarre' for Saudis to sponsor WWCBills' Hamlin makes appearance to receive awardWWE Hall of Famer Lawler recovering from strokeWhich NFL team trades up to No. 1?Favorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InICYMI1:54Orlovsky roasts Stephen A. for his top-5 players in the Super BowlDan Orlovsky lets Stephen A. Smith hear it after he lists his top five players in Super Bowl LVII. Best of ESPN+Michael Hickey/Getty ImagesBubble Watch 2023: Brace yourself for NCAA tournament dramaThe countdown to Selection Sunday on March 12 has begun. We will track which teams are locks and which ones can play their way into or out of the 2023 NCAA men's basketball tournament.Adam Pantozzi/NBAE via Getty ImagesLeBron's journey to the NBA scoring record in shot chartsTake a look at how LeBron James' on-court performance has changed during his march to 38,388 points.Illustration by ESPNRe-drafting first two rounds of 2022 NFL class: All 64 picksWe gave every NFL team a do-over for last year's draft, re-drafting the top 64 picks. Here's who rises and falls with the benefit of hindsight.AP Photo/David DermerWay-too-early 2023 MLB starting rotation rankingsThe Yanks' and Mets' rotations take two of the top three spots on our pre-spring training list. Where did they land -- and did another team sneak past one of 'em? Trending NowAP Photo/Jae C. HongStars pay tribute to LeBron James for securing NBA's all-time points recordLeBron James has passed Kareem Abdul-Jabbar for No. 1 on the all-time NBA scoring list, and other stars paid tribute to him on social media.Getty ImagesFans prepare for Rihanna's 2023 Super Bowl halftime showAs Rihanna prepares to make her highly anticipated return, supporters of all 32 teams are paying homage to the icon -- as only tormented NFL fans can.Photo by Cooper Neill/Getty ImagesWhen is the 2023 Super Bowl? Date, time for Chiefs vs. EaglesWe have you covered with seeding, scores and the full schedule for this season's playoffs -- and how to watch Super Bowl LVII.James Drake/Sports Illustrated via Getty ImagesNFL history: Super Bowl winners and resultsFrom the Packers' 1967 win over the Chiefs to the Rams' victory over the Bengals in 2022, we've got results for every Super Bowl.China Wong/NHLI via Getty ImagesBoston Bruins record tracker: Wins, points, milestonesThe B's are on pace for NHL records in wins and points, along with some individual superlatives as well. Follow along here with our updated tracker. Sports BettingPhoto by Kevin C. Cox/Getty ImagesSuper Bowl LVII betting: Everything you need to know to bet Eagles-ChiefsHere's your one-stop shop for all the information you need to help make your picks on the Philadelphia Eagles vs. Kansas City Chiefs in Super Bowl LVII. How to Watch on ESPN+(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+. \\n\\nESPN+\\n\\n\\n\\n\\nUFC 284: Makhachev vs. Volkanovski (ESPN+ PPV)\\n\\n\\n\\n\\n\\n\\n\\nMen's College Hoops: Select Games\\n\\n\\n\\n\\n\\n\\n\\nWomen's College Hoops: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nGerman Cup: Round of 16\\n\\n\\n\\n\\n\\n\\n\\n30 For 30: Bullies Of Baltimore\\n\\n\\n\\n\\n\\n\\n\\nMatt Miller's Two-Round NFL Mock Draft\\n\\n\\nQuick Links\\n\\n\\n\\n\\nSuper Bowl LVII\\n\\n\\n\\n\\n\\n\\n\\nSuper Bowl Betting\\n\\n\\n\\n\\n\\n\\n\\nNBA Trade Machine\\n\\n\\n\\n\\n\\n\\n\\nNBA All-Star Game\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nGames For Me\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", lookup_str='', metadata={'source': 'https://www.espn.com/'}, lookup_index=0)]" + "[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBANCAAMNCAAWNHLSoccer…MLBNCAAFGolfTennisSports BettingBoxingCFLNCAACricketF1HorseLLWSMMANASCARNBA G LeagueOlympic SportsRacingRN BBRN FBRugbyWNBAWorld Baseball ClassicWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nAre you ready for Opening Day? Here's your guide to MLB's offseason chaosWait, Jacob deGrom is on the Rangers now? Xander Bogaerts and Trea Turner signed where? And what about Carlos Correa? Yeah, you're going to need to read up before Opening Day.12hESPNIllustration by ESPNEverything you missed in the MLB offseason3h2:33World Series odds, win totals, props for every teamPlay fantasy baseball for free!TOP HEADLINESQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersLAMAR WANTS OUT OF BALTIMOREMarcus Spears identifies the two teams that need Lamar Jackson the most8h2:00Would Lamar sit out? Will Ravens draft a QB? Jackson trade request insightsLamar Jackson has asked Baltimore to trade him, but Ravens coach John Harbaugh hopes the QB will be back.3hJamison HensleyBallard, Colts will consider trading for QB JacksonJackson to Indy? Washington? Barnwell ranks the QB's trade fitsSNYDER'S TUMULTUOUS 24-YEAR RUNHow Washington’s NFL franchise sank on and off the field under owner Dan SnyderSnyder purchased one of the NFL's marquee franchises in 1999. Twenty-four years later, and with the team up for sale, he leaves a legacy of on-field futility and off-field scandal.13hJohn KeimESPNIOWA STAR STEPS UP AGAINJ-Will: Caitlin Clark is the biggest brand in college sports right now8h0:47'The better the opponent, the better she plays': Clark draws comparisons to TaurasiCaitlin Clark's performance on Sunday had longtime observers going back decades to find comparisons.16hKevin PeltonWOMEN'S ELITE EIGHT SCOREBOARDMONDAY'S GAMESCheck your bracket!NBA DRAFTHow top prospects fared on the road to the Final FourThe 2023 NCAA tournament is down to four teams, and ESPN's Jonathan Givony recaps the players who saw their NBA draft stock change.11hJonathan GivonyAndy Lyons/Getty ImagesTALKING BASKETBALLWhy AD needs to be more assertive with LeBron on the court10h1:33Why Perk won't blame Kyrie for Mavs' woes8h1:48WHERE EVERY TEAM STANDSNew NFL Power Rankings: Post-free-agency 1-32 poll, plus underrated offseason movesThe free agent frenzy has come and gone. Which teams have improved their 2023 outlook, and which teams have taken a hit?12hNFL Nation reportersIllustration by ESPNTHE BUCK STOPS WITH BELICHICKBruschi: Fair to criticize Bill Belichick for Patriots' struggles10h1:27 Top HeadlinesQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InMarch Madness LiveESPNMarch Madness LiveWatch every men's NCAA tournament game live! ICYMI1:42Austin Peay's coach, pitcher and catcher all ejected after retaliation pitchAustin Peay's pitcher, catcher and coach were all ejected after a pitch was thrown at Liberty's Nathan Keeter, who earlier in the game hit a home run and celebrated while running down the third-base line. Men's Tournament ChallengeIllustration by ESPNMen's Tournament ChallengeCheck your bracket(s) in the 2023 Men's Tournament Challenge, which you can follow throughout the Big Dance. Women's Tournament ChallengeIllustration by ESPNWomen's Tournament ChallengeCheck your bracket(s) in the 2023 Women's Tournament Challenge, which you can follow throughout the Big Dance. Best of ESPN+AP Photo/Lynne SladkyFantasy Baseball ESPN+ Cheat Sheet: Sleepers, busts, rookies and closersYou've read their names all preseason long, it'd be a shame to forget them on draft day. The ESPN+ Cheat Sheet is one way to make sure that doesn't happen.Steph Chambers/Getty ImagesPassan's 2023 MLB season preview: Bold predictions and moreOpening Day is just over a week away -- and Jeff Passan has everything you need to know covered from every possible angle.Photo by Bob Kupbens/Icon Sportswire2023 NFL free agency: Best team fits for unsigned playersWhere could Ezekiel Elliott land? Let's match remaining free agents to teams and find fits for two trade candidates.Illustration by ESPN2023 NFL mock draft: Mel Kiper's first-round pick predictionsMel Kiper Jr. makes his predictions for Round 1 of the NFL draft, including projecting a trade in the top five. Trending NowAnne-Marie Sorvin-USA TODAY SBoston Bruins record tracker: Wins, points, milestonesThe B's are on pace for NHL records in wins and points, along with some individual superlatives as well. Follow along here with our updated tracker.Mandatory Credit: William Purnell-USA TODAY Sports2023 NFL full draft order: AFC, NFC team picks for all roundsStarting with the Carolina Panthers at No. 1 overall, here's the entire 2023 NFL draft broken down round by round. How to Watch on ESPN+Gregory Fisher/Icon Sportswire2023 NCAA men's hockey: Results, bracket, how to watchThe matchups in Tampa promise to be thrillers, featuring plenty of star power, high-octane offense and stellar defense.(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+.Hailie Lynch/XFLHow to watch the XFL: 2023 schedule, teams, players, news, moreEvery XFL game will be streamed on ESPN+. Find out when and where else you can watch the eight teams compete. Sign up to play the #1 Fantasy Baseball GameReactivate A LeagueCreate A LeagueJoin a Public LeaguePractice With a Mock DraftSports BettingAP Photo/Mike KropfMarch Madness betting 2023: Bracket odds, lines, tips, moreThe 2023 NCAA tournament brackets have finally been released, and we have everything you need to know to make a bet on all of the March Madness games. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivateMock Draft Now\\n\\nESPN+\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", lookup_str='', metadata={'source': 'https://www.espn.com/'}, lookup_index=0)]" ] }, "execution_count": 4, @@ -84,10 +84,138 @@ "\"\"\";" ] }, + { + "cell_type": "markdown", + "id": "150988e6", + "metadata": {}, + "source": [ + "# Loading multiple webpages\n", + "\n", + "You can also load multiple webpages at once by passing in a list of urls to the loader. This will return a list of documents in the same order as the urls passed in." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e25bbd3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBANCAAMNCAAWNHLSoccer…MLBNCAAFGolfTennisSports BettingBoxingCFLNCAACricketF1HorseLLWSMMANASCARNBA G LeagueOlympic SportsRacingRN BBRN FBRugbyWNBAWorld Baseball ClassicWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nAre you ready for Opening Day? Here's your guide to MLB's offseason chaosWait, Jacob deGrom is on the Rangers now? Xander Bogaerts and Trea Turner signed where? And what about Carlos Correa? Yeah, you're going to need to read up before Opening Day.12hESPNIllustration by ESPNEverything you missed in the MLB offseason3h2:33World Series odds, win totals, props for every teamPlay fantasy baseball for free!TOP HEADLINESQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersLAMAR WANTS OUT OF BALTIMOREMarcus Spears identifies the two teams that need Lamar Jackson the most7h2:00Would Lamar sit out? Will Ravens draft a QB? Jackson trade request insightsLamar Jackson has asked Baltimore to trade him, but Ravens coach John Harbaugh hopes the QB will be back.3hJamison HensleyBallard, Colts will consider trading for QB JacksonJackson to Indy? Washington? Barnwell ranks the QB's trade fitsSNYDER'S TUMULTUOUS 24-YEAR RUNHow Washington’s NFL franchise sank on and off the field under owner Dan SnyderSnyder purchased one of the NFL's marquee franchises in 1999. Twenty-four years later, and with the team up for sale, he leaves a legacy of on-field futility and off-field scandal.13hJohn KeimESPNIOWA STAR STEPS UP AGAINJ-Will: Caitlin Clark is the biggest brand in college sports right now8h0:47'The better the opponent, the better she plays': Clark draws comparisons to TaurasiCaitlin Clark's performance on Sunday had longtime observers going back decades to find comparisons.16hKevin PeltonWOMEN'S ELITE EIGHT SCOREBOARDMONDAY'S GAMESCheck your bracket!NBA DRAFTHow top prospects fared on the road to the Final FourThe 2023 NCAA tournament is down to four teams, and ESPN's Jonathan Givony recaps the players who saw their NBA draft stock change.11hJonathan GivonyAndy Lyons/Getty ImagesTALKING BASKETBALLWhy AD needs to be more assertive with LeBron on the court9h1:33Why Perk won't blame Kyrie for Mavs' woes8h1:48WHERE EVERY TEAM STANDSNew NFL Power Rankings: Post-free-agency 1-32 poll, plus underrated offseason movesThe free agent frenzy has come and gone. Which teams have improved their 2023 outlook, and which teams have taken a hit?12hNFL Nation reportersIllustration by ESPNTHE BUCK STOPS WITH BELICHICKBruschi: Fair to criticize Bill Belichick for Patriots' struggles10h1:27 Top HeadlinesQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InMarch Madness LiveESPNMarch Madness LiveWatch every men's NCAA tournament game live! ICYMI1:42Austin Peay's coach, pitcher and catcher all ejected after retaliation pitchAustin Peay's pitcher, catcher and coach were all ejected after a pitch was thrown at Liberty's Nathan Keeter, who earlier in the game hit a home run and celebrated while running down the third-base line. Men's Tournament ChallengeIllustration by ESPNMen's Tournament ChallengeCheck your bracket(s) in the 2023 Men's Tournament Challenge, which you can follow throughout the Big Dance. Women's Tournament ChallengeIllustration by ESPNWomen's Tournament ChallengeCheck your bracket(s) in the 2023 Women's Tournament Challenge, which you can follow throughout the Big Dance. Best of ESPN+AP Photo/Lynne SladkyFantasy Baseball ESPN+ Cheat Sheet: Sleepers, busts, rookies and closersYou've read their names all preseason long, it'd be a shame to forget them on draft day. The ESPN+ Cheat Sheet is one way to make sure that doesn't happen.Steph Chambers/Getty ImagesPassan's 2023 MLB season preview: Bold predictions and moreOpening Day is just over a week away -- and Jeff Passan has everything you need to know covered from every possible angle.Photo by Bob Kupbens/Icon Sportswire2023 NFL free agency: Best team fits for unsigned playersWhere could Ezekiel Elliott land? Let's match remaining free agents to teams and find fits for two trade candidates.Illustration by ESPN2023 NFL mock draft: Mel Kiper's first-round pick predictionsMel Kiper Jr. makes his predictions for Round 1 of the NFL draft, including projecting a trade in the top five. Trending NowAnne-Marie Sorvin-USA TODAY SBoston Bruins record tracker: Wins, points, milestonesThe B's are on pace for NHL records in wins and points, along with some individual superlatives as well. Follow along here with our updated tracker.Mandatory Credit: William Purnell-USA TODAY Sports2023 NFL full draft order: AFC, NFC team picks for all roundsStarting with the Carolina Panthers at No. 1 overall, here's the entire 2023 NFL draft broken down round by round. How to Watch on ESPN+Gregory Fisher/Icon Sportswire2023 NCAA men's hockey: Results, bracket, how to watchThe matchups in Tampa promise to be thrillers, featuring plenty of star power, high-octane offense and stellar defense.(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+.Hailie Lynch/XFLHow to watch the XFL: 2023 schedule, teams, players, news, moreEvery XFL game will be streamed on ESPN+. Find out when and where else you can watch the eight teams compete. Sign up to play the #1 Fantasy Baseball GameReactivate A LeagueCreate A LeagueJoin a Public LeaguePractice With a Mock DraftSports BettingAP Photo/Mike KropfMarch Madness betting 2023: Bracket odds, lines, tips, moreThe 2023 NCAA tournament brackets have finally been released, and we have everything you need to know to make a bet on all of the March Madness games. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivateMock Draft Now\\n\\nESPN+\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", lookup_str='', metadata={'source': 'https://www.espn.com/'}, lookup_index=0),\n", + " Document(page_content='GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\xa0Advanced searchAdvertisingBusiness SolutionsAbout Google© 2023 - Privacy - Terms ', lookup_str='', metadata={'source': 'https://google.com'}, lookup_index=0)]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader = WebBaseLoader([\"https://www.espn.com/\", \"https://google.com\"])\n", + "docs = loader.load()\n", + "docs" + ] + }, + { + "cell_type": "markdown", + "id": "641be294", + "metadata": {}, + "source": [ + "## Load multiple urls concurrently\n", + "\n", + "You can speed up the scraping process by scraping and parsing multiple urls concurrently.\n", + "\n", + "There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the server you are scraping and don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9f9cf30f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: nest_asyncio in /Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages (1.5.6)\r\n" + ] + } + ], + "source": [ + "!pip install nest_asyncio\n", + "\n", + "# fixes a bug with asyncio and jupyter\n", + "import nest_asyncio\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "49586eac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"\\n\\n\\n\\n\\n\\n\\n\\n\\nESPN - Serving Sports Fans. Anytime. Anywhere.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n Skip to navigation\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n<\\n\\n>\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nMenuESPN\\n\\n\\nSearch\\n\\n\\n\\nscores\\n\\n\\n\\nNFLNBANCAAMNCAAWNHLSoccer…MLBNCAAFGolfTennisSports BettingBoxingCFLNCAACricketF1HorseLLWSMMANASCARNBA G LeagueOlympic SportsRacingRN BBRN FBRugbyWNBAWorld Baseball ClassicWWEX GamesXFLMore ESPNFantasyListenWatchESPN+\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nSUBSCRIBE NOW\\n\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\n\\n\\n\\n\\nFavorites\\n\\n\\n\\n\\n\\n\\n Manage Favorites\\n \\n\\n\\n\\nCustomize ESPNSign UpLog InESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nAre you ready for Opening Day? Here's your guide to MLB's offseason chaosWait, Jacob deGrom is on the Rangers now? Xander Bogaerts and Trea Turner signed where? And what about Carlos Correa? Yeah, you're going to need to read up before Opening Day.12hESPNIllustration by ESPNEverything you missed in the MLB offseason3h2:33World Series odds, win totals, props for every teamPlay fantasy baseball for free!TOP HEADLINESQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersLAMAR WANTS OUT OF BALTIMOREMarcus Spears identifies the two teams that need Lamar Jackson the most7h2:00Would Lamar sit out? Will Ravens draft a QB? Jackson trade request insightsLamar Jackson has asked Baltimore to trade him, but Ravens coach John Harbaugh hopes the QB will be back.3hJamison HensleyBallard, Colts will consider trading for QB JacksonJackson to Indy? Washington? Barnwell ranks the QB's trade fitsSNYDER'S TUMULTUOUS 24-YEAR RUNHow Washington’s NFL franchise sank on and off the field under owner Dan SnyderSnyder purchased one of the NFL's marquee franchises in 1999. Twenty-four years later, and with the team up for sale, he leaves a legacy of on-field futility and off-field scandal.13hJohn KeimESPNIOWA STAR STEPS UP AGAINJ-Will: Caitlin Clark is the biggest brand in college sports right now8h0:47'The better the opponent, the better she plays': Clark draws comparisons to TaurasiCaitlin Clark's performance on Sunday had longtime observers going back decades to find comparisons.16hKevin PeltonWOMEN'S ELITE EIGHT SCOREBOARDMONDAY'S GAMESCheck your bracket!NBA DRAFTHow top prospects fared on the road to the Final FourThe 2023 NCAA tournament is down to four teams, and ESPN's Jonathan Givony recaps the players who saw their NBA draft stock change.11hJonathan GivonyAndy Lyons/Getty ImagesTALKING BASKETBALLWhy AD needs to be more assertive with LeBron on the court9h1:33Why Perk won't blame Kyrie for Mavs' woes8h1:48WHERE EVERY TEAM STANDSNew NFL Power Rankings: Post-free-agency 1-32 poll, plus underrated offseason movesThe free agent frenzy has come and gone. Which teams have improved their 2023 outlook, and which teams have taken a hit?12hNFL Nation reportersIllustration by ESPNTHE BUCK STOPS WITH BELICHICKBruschi: Fair to criticize Bill Belichick for Patriots' struggles10h1:27 Top HeadlinesQB Jackson has requested trade from RavensSources: Texas hiring Terry as full-time coachJets GM: No rush on Rodgers; Lamar not optionLove to leave North Carolina, enter transfer portalBelichick to angsty Pats fans: See last 25 yearsEmbiid out, Harden due back vs. Jokic, NuggetsLynch: Purdy 'earned the right' to start for NinersMan Utd, Wrexham plan July friendly in San DiegoOn paper, Padres overtake DodgersFavorites FantasyManage FavoritesFantasy HomeCustomize ESPNSign UpLog InMarch Madness LiveESPNMarch Madness LiveWatch every men's NCAA tournament game live! ICYMI1:42Austin Peay's coach, pitcher and catcher all ejected after retaliation pitchAustin Peay's pitcher, catcher and coach were all ejected after a pitch was thrown at Liberty's Nathan Keeter, who earlier in the game hit a home run and celebrated while running down the third-base line. Men's Tournament ChallengeIllustration by ESPNMen's Tournament ChallengeCheck your bracket(s) in the 2023 Men's Tournament Challenge, which you can follow throughout the Big Dance. Women's Tournament ChallengeIllustration by ESPNWomen's Tournament ChallengeCheck your bracket(s) in the 2023 Women's Tournament Challenge, which you can follow throughout the Big Dance. Best of ESPN+AP Photo/Lynne SladkyFantasy Baseball ESPN+ Cheat Sheet: Sleepers, busts, rookies and closersYou've read their names all preseason long, it'd be a shame to forget them on draft day. The ESPN+ Cheat Sheet is one way to make sure that doesn't happen.Steph Chambers/Getty ImagesPassan's 2023 MLB season preview: Bold predictions and moreOpening Day is just over a week away -- and Jeff Passan has everything you need to know covered from every possible angle.Photo by Bob Kupbens/Icon Sportswire2023 NFL free agency: Best team fits for unsigned playersWhere could Ezekiel Elliott land? Let's match remaining free agents to teams and find fits for two trade candidates.Illustration by ESPN2023 NFL mock draft: Mel Kiper's first-round pick predictionsMel Kiper Jr. makes his predictions for Round 1 of the NFL draft, including projecting a trade in the top five. Trending NowAnne-Marie Sorvin-USA TODAY SBoston Bruins record tracker: Wins, points, milestonesThe B's are on pace for NHL records in wins and points, along with some individual superlatives as well. Follow along here with our updated tracker.Mandatory Credit: William Purnell-USA TODAY Sports2023 NFL full draft order: AFC, NFC team picks for all roundsStarting with the Carolina Panthers at No. 1 overall, here's the entire 2023 NFL draft broken down round by round. How to Watch on ESPN+Gregory Fisher/Icon Sportswire2023 NCAA men's hockey: Results, bracket, how to watchThe matchups in Tampa promise to be thrillers, featuring plenty of star power, high-octane offense and stellar defense.(AP Photo/Koji Sasahara, File)How to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN, ESPN+Here's everything you need to know about how to watch the PGA Tour, Masters, PGA Championship and FedEx Cup playoffs on ESPN and ESPN+.Hailie Lynch/XFLHow to watch the XFL: 2023 schedule, teams, players, news, moreEvery XFL game will be streamed on ESPN+. Find out when and where else you can watch the eight teams compete. Sign up to play the #1 Fantasy Baseball GameReactivate A LeagueCreate A LeagueJoin a Public LeaguePractice With a Mock DraftSports BettingAP Photo/Mike KropfMarch Madness betting 2023: Bracket odds, lines, tips, moreThe 2023 NCAA tournament brackets have finally been released, and we have everything you need to know to make a bet on all of the March Madness games. Sign up to play the #1 Fantasy game!Create A LeagueJoin Public LeagueReactivateMock Draft Now\\n\\nESPN+\\n\\n\\n\\n\\nNHL: Select Games\\n\\n\\n\\n\\n\\n\\n\\nXFL\\n\\n\\n\\n\\n\\n\\n\\nMLB: Select Games\\n\\n\\n\\n\\n\\n\\n\\nNCAA Baseball\\n\\n\\n\\n\\n\\n\\n\\nNCAA Softball\\n\\n\\n\\n\\n\\n\\n\\nCricket: Select Matches\\n\\n\\n\\n\\n\\n\\n\\nMel Kiper's NFL Mock Draft 3.0\\n\\n\\nQuick Links\\n\\n\\n\\n\\nMen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nWomen's Tournament Challenge\\n\\n\\n\\n\\n\\n\\n\\nNFL Draft Order\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch NHL Games\\n\\n\\n\\n\\n\\n\\n\\nFantasy Baseball: Sign Up\\n\\n\\n\\n\\n\\n\\n\\nHow To Watch PGA TOUR\\n\\n\\nESPN Sites\\n\\n\\n\\n\\nESPN Deportes\\n\\n\\n\\n\\n\\n\\n\\nAndscape\\n\\n\\n\\n\\n\\n\\n\\nespnW\\n\\n\\n\\n\\n\\n\\n\\nESPNFC\\n\\n\\n\\n\\n\\n\\n\\nX Games\\n\\n\\n\\n\\n\\n\\n\\nSEC Network\\n\\n\\nESPN Apps\\n\\n\\n\\n\\nESPN\\n\\n\\n\\n\\n\\n\\n\\nESPN Fantasy\\n\\n\\nFollow ESPN\\n\\n\\n\\n\\nFacebook\\n\\n\\n\\n\\n\\n\\n\\nTwitter\\n\\n\\n\\n\\n\\n\\n\\nInstagram\\n\\n\\n\\n\\n\\n\\n\\nSnapchat\\n\\n\\n\\n\\n\\n\\n\\nYouTube\\n\\n\\n\\n\\n\\n\\n\\nThe ESPN Daily Podcast\\n\\n\\nTerms of UsePrivacy PolicyYour US State Privacy RightsChildren's Online Privacy PolicyInterest-Based AdsAbout Nielsen MeasurementDo Not Sell or Share My Personal InformationContact UsDisney Ad Sales SiteWork for ESPNCopyright: © ESPN Enterprises, Inc. All rights reserved.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\", lookup_str='', metadata={'source': 'https://www.espn.com/'}, lookup_index=0),\n", + " Document(page_content='GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\xa0Advanced searchAdvertisingBusiness SolutionsAbout Google© 2023 - Privacy - Terms ', lookup_str='', metadata={'source': 'https://google.com'}, lookup_index=0)]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader = WebBaseLoader([\"https://www.espn.com/\", \"https://google.com\"])\n", + "loader.requests_per_second = 1\n", + "docs = loader.aload()\n", + "docs" + ] + }, + { + "cell_type": "markdown", + "id": "e337b130", + "metadata": {}, + "source": [ + "## Loading a xml file, or using a different BeautifulSoup parser\n", + "\n", + "You can also look at `SitemapLoader` for an example of how to load a sitemap file, which is an example of using this feature." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "16530c50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='\\n\\n10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n§ 431.86\\nSection § 431.86\\n\\nEnergy\\nDEPARTMENT OF ENERGY\\nENERGY CONSERVATION\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\nCommercial Packaged Boilers\\nTest Procedures\\n\\n\\n\\n\\n§\\u2009431.86\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\n\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\n\\nEquipment category\\nSubcategory\\nCertified rated inputBtu/h\\n\\nStandards efficiency metric(§\\u2009431.87)\\n\\nTest procedure(corresponding to\\nstandards efficiency\\nmetric required\\nby §\\u2009431.87)\\n\\n\\n\\nHot Water\\nGas-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nGas-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nHot Water\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nHot Water\\nOil-fired\\n>2,500,000\\nCombustion Efficiency\\nAppendix A, Section 3.\\n\\n\\nSteam\\nGas-fired (all*)\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nGas-fired (all*)\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3 with Section 2.4.3.2.\\n\\n\\n\\nSteam\\nOil-fired\\n≥300,000 and ≤2,500,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\nSteam\\nOil-fired\\n>2,500,000 and ≤5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.\\n\\n\\n\\u2003\\n\\n>5,000,000\\nThermal Efficiency\\nAppendix A, Section 2.OR\\nAppendix A, Section 3. with Section 2.4.3.2.\\n\\n\\n\\n*\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\n\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\n[81 FR 89305, Dec. 9, 2016]\\n\\n\\nEnergy Efficiency Standards\\n\\n', lookup_str='', metadata={'source': 'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml'}, lookup_index=0)]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loader = WebBaseLoader(\"https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\")\n", + "loader.default_parser = \"xml\"\n", + "docs = loader.load()\n", + "docs\n" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "ca330a63", + "id": "1dd8ab23", "metadata": {}, "outputs": [], "source": [] @@ -109,7 +237,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.1" + "version": "3.9.1" } }, "nbformat": 4, diff --git a/docs/use_cases/question_answering.md b/docs/use_cases/question_answering.md index 931ba5a2..068c49d9 100644 --- a/docs/use_cases/question_answering.md +++ b/docs/use_cases/question_answering.md @@ -16,7 +16,7 @@ See [this notebook](../modules/indexes/getting_started.ipynb) for a more detaile from langchain.document_loaders import TextLoader loader = TextLoader('../state_of_the_union.txt') ``` -See [here](../modules/document_loaders/how_to_guides.rst) for more information on how to get started with document loading. +See [here](../modules/indexes/document_loaders.rst) for more information on how to get started with document loading. **Create Your Index** ```python diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index ceac2b9b..1f9b833c 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -43,6 +43,7 @@ from langchain.document_loaders.readthedocs import ReadTheDocsLoader from langchain.document_loaders.roam import RoamLoader from langchain.document_loaders.s3_directory import S3DirectoryLoader from langchain.document_loaders.s3_file import S3FileLoader +from langchain.document_loaders.sitemap import SitemapLoader from langchain.document_loaders.srt import SRTLoader from langchain.document_loaders.telegram import TelegramChatLoader from langchain.document_loaders.text import TextLoader @@ -112,4 +113,5 @@ __all__ = [ "BlackboardLoader", "AzureBlobStorageFileLoader", "AzureBlobStorageContainerLoader", + "SitemapLoader", ] diff --git a/langchain/document_loaders/sitemap.py b/langchain/document_loaders/sitemap.py new file mode 100644 index 00000000..4a2c1d28 --- /dev/null +++ b/langchain/document_loaders/sitemap.py @@ -0,0 +1,69 @@ +"""Loader that fetches a sitemap and loads those URLs.""" +import re +from typing import Any, List, Optional + +from langchain.document_loaders.web_base import WebBaseLoader +from langchain.schema import Document + + +class SitemapLoader(WebBaseLoader): + """Loader that fetches a sitemap and loads those URLs.""" + + def __init__(self, web_path: str, filter_urls: Optional[List[str]] = None): + """Initialize with webpage path and optional filter URLs. + + Args: + web_path: url of the sitemap + filter_urls: list of strings or regexes that will be applied to filter the + urls that are parsed and loaded + """ + + try: + import lxml # noqa:F401 + except ImportError: + raise ValueError( + "lxml package not found, please install it with " "`pip install lxml`" + ) + + super().__init__(web_path) + + self.filter_urls = filter_urls + + def parse_sitemap(self, soup: Any) -> List[dict]: + """Parse sitemap xml and load into a list of dicts.""" + els = [] + for url in soup.find_all("url"): + loc = url.find("loc") + if not loc: + continue + + if self.filter_urls and not any( + re.match(r, loc.text) for r in self.filter_urls + ): + continue + + els.append( + { + tag: prop.text + for tag in ["loc", "lastmod", "changefreq", "priority"] + if (prop := url.find(tag)) + } + ) + + return els + + def load(self) -> List[Document]: + """Load sitemap.""" + soup = self.scrape("xml") + + els = self.parse_sitemap(soup) + + results = self.scrape_all([el["loc"] for el in els if "loc" in el]) + + return [ + Document( + page_content=str(results[i].get_text()), + metadata={**{"source": els[i]["loc"]}, **els[i]}, + ) + for i in range(len(results)) + ] diff --git a/langchain/document_loaders/web_base.py b/langchain/document_loaders/web_base.py index 90616a8c..cc1e3ab3 100644 --- a/langchain/document_loaders/web_base.py +++ b/langchain/document_loaders/web_base.py @@ -1,7 +1,9 @@ """Web base loader class.""" +import asyncio import logging -from typing import Any, List, Optional +from typing import Any, List, Optional, Union +import aiohttp import requests from langchain.docstore.document import Document @@ -24,10 +26,34 @@ default_header_template = { class WebBaseLoader(BaseLoader): """Loader that uses urllib and beautiful soup to load webpages.""" - def __init__(self, web_path: str, header_template: Optional[dict] = None): + web_paths: List[str] + + requests_per_second: int = 2 + """Max number of concurrent requests to make.""" + + default_parser: str = "html.parser" + """Default parser to use for BeautifulSoup.""" + + def __init__( + self, web_path: Union[str, List[str]], header_template: Optional[dict] = None + ): """Initialize with webpage path.""" - self.web_path = web_path + + # TODO: Deprecate web_path in favor of web_paths, and remove this + # left like this because there are a number of loaders that expect single + # urls + if isinstance(web_path, str): + self.web_paths = [web_path] + elif isinstance(web_path, List): + self.web_paths = web_path + self.session = requests.Session() + try: + import bs4 # noqa:F401 + except ImportError: + raise ValueError( + "bs4 package not found, please install it with " "`pip install bs4`" + ) try: from fake_useragent import UserAgent @@ -41,20 +67,91 @@ class WebBaseLoader(BaseLoader): "To get a realistic header for requests, `pip install fake_useragent`." ) - def _scrape(self, url: str) -> Any: + @property + def web_path(self) -> str: + if len(self.web_paths) > 1: + raise ValueError("Multiple webpaths found.") + return self.web_paths[0] + + async def _fetch(self, url: str) -> str: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=self.session.headers) as response: + return await response.text() + + async def _fetch_with_rate_limit( + self, url: str, semaphore: asyncio.Semaphore + ) -> str: + async with semaphore: + return await self._fetch(url) + + async def fetch_all(self, urls: List[str]) -> Any: + """Fetch all urls concurrently with rate limiting.""" + semaphore = asyncio.Semaphore(self.requests_per_second) + tasks = [] + for url in urls: + task = asyncio.ensure_future(self._fetch_with_rate_limit(url, semaphore)) + tasks.append(task) + return await asyncio.gather(*tasks) + + @staticmethod + def _check_parser(parser: str) -> None: + """Check that parser is valid for bs4.""" + valid_parsers = ["html.parser", "lxml", "xml", "lxml-xml", "html5lib"] + if parser not in valid_parsers: + raise ValueError( + "`parser` must be one of " + ", ".join(valid_parsers) + "." + ) + + def scrape_all(self, urls: List[str], parser: Union[str, None] = None) -> List[Any]: + """Fetch all urls, then return soups for all results.""" from bs4 import BeautifulSoup + if parser is None: + parser = self.default_parser + + self._check_parser(parser) + + results = asyncio.run(self.fetch_all(urls)) + return [BeautifulSoup(result, parser) for result in results] + + def _scrape(self, url: str, parser: Union[str, None] = None) -> Any: + from bs4 import BeautifulSoup + + if parser is None: + parser = self.default_parser + + self._check_parser(parser) + html_doc = self.session.get(url) - soup = BeautifulSoup(html_doc.text, "html.parser") - return soup + return BeautifulSoup(html_doc.text, parser) - def scrape(self) -> Any: + def scrape(self, parser: Union[str, None] = None) -> Any: """Scrape data from webpage and return it in BeautifulSoup format.""" - return self._scrape(self.web_path) + + if parser is None: + parser = self.default_parser + + return self._scrape(self.web_path, parser) def load(self) -> List[Document]: - """Load data into document objects.""" - soup = self.scrape() - text = soup.get_text() - metadata = {"source": self.web_path} - return [Document(page_content=text, metadata=metadata)] + """Load text from the url(s) in web_path.""" + docs = [] + for path in self.web_paths: + soup = self._scrape(path) + text = soup.get_text() + metadata = {"source": path} + docs.append(Document(page_content=text, metadata=metadata)) + + return docs + + def aload(self) -> List[Document]: + """Load text from the urls in web_path async into Documents.""" + + results = self.scrape_all(self.web_paths) + docs = [] + for i in range(len(results)): + text = results[i].get_text() + metadata = {"source": self.web_paths[i]} + docs.append(Document(page_content=text, metadata=metadata)) + + return docs diff --git a/tests/integration_tests/document_loaders/test_sitemap.py b/tests/integration_tests/document_loaders/test_sitemap.py new file mode 100644 index 00000000..87147ec6 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_sitemap.py @@ -0,0 +1,20 @@ +from langchain.document_loaders import SitemapLoader + + +def test_sitemap() -> None: + """Test sitemap loader.""" + loader = SitemapLoader("https://langchain.readthedocs.io/sitemap.xml") + documents = loader.load() + assert len(documents) > 1 + assert "🦜🔗" in documents[0].page_content + + +def test_filter_sitemap() -> None: + """Test sitemap loader.""" + loader = SitemapLoader( + "https://langchain.readthedocs.io/sitemap.xml", + filter_urls=["https://langchain.readthedocs.io/en/stable/"], + ) + documents = loader.load() + assert len(documents) == 1 + assert "🦜🔗" in documents[0].page_content