From d29f74114ea886eeb5de9504048a1d99b3659117 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 26 Feb 2023 17:26:37 -0800 Subject: [PATCH] copy paste loader (#1302) --- .../document_loaders/examples/copypaste.ipynb | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 docs/modules/document_loaders/examples/copypaste.ipynb diff --git a/docs/modules/document_loaders/examples/copypaste.ipynb b/docs/modules/document_loaders/examples/copypaste.ipynb new file mode 100644 index 00000000..40f11427 --- /dev/null +++ b/docs/modules/document_loaders/examples/copypaste.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d9826810", + "metadata": {}, + "source": [ + "# Copy Paste\n", + "\n", + "This notebook covers how to load a document object from something you just want to copy and paste. In this case, you don't even need to use a DocumentLoader, but rather can just construct the Document directly." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fd9e71a2", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.docstore.document import Document" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f40d3f30", + "metadata": {}, + "outputs": [], + "source": [ + "text = \"..... put the text you copy pasted here......\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d409bdba", + "metadata": {}, + "outputs": [], + "source": [ + "doc = Document(page_content=text)" + ] + }, + { + "cell_type": "markdown", + "id": "cc0eff72", + "metadata": {}, + "source": [ + "## Metadata\n", + "If you want to add metadata about the where you got this piece of text, you easily can with the metadata key." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fe3aa5aa", + "metadata": {}, + "outputs": [], + "source": [ + "metadata = {\"source\": \"internet\", \"date\": \"Friday\"}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "827d4e91", + "metadata": {}, + "outputs": [], + "source": [ + "doc = Document(page_content=text, metadata=metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c986a43d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}