{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# DuckDB Loader\n", "\n", "Load a DuckDB query with one document per row." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from langchain.document_loaders import DuckDBLoader" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing example.csv\n" ] } ], "source": [ "%%file example.csv\n", "Team,Payroll\n", "Nationals,81.34\n", "Reds,82.20" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "loader = DuckDBLoader(\"SELECT * FROM read_csv_auto('example.csv')\")\n", "\n", "data = loader.load()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Document(page_content='Team: Nationals\\nPayroll: 81.34', metadata={}), Document(page_content='Team: Reds\\nPayroll: 82.2', metadata={})]\n" ] } ], "source": [ "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Specifying Which Columns are Content vs Metadata" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "loader = DuckDBLoader(\n", " \"SELECT * FROM read_csv_auto('example.csv')\",\n", " page_content_columns=[\"Team\"],\n", " metadata_columns=[\"Payroll\"]\n", ")\n", "\n", "data = loader.load()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Document(page_content='Team: Nationals', metadata={'Payroll': 81.34}), Document(page_content='Team: Reds', metadata={'Payroll': 82.2})]\n" ] } ], "source": [ "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding Source to Metadata" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "loader = DuckDBLoader(\n", " \"SELECT Team, Payroll, Team As source FROM read_csv_auto('example.csv')\",\n", " metadata_columns=[\"source\"]\n", ")\n", "\n", "data = loader.load()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Document(page_content='Team: Nationals\\nPayroll: 81.34\\nsource: Nationals', metadata={'source': 'Nationals'}), Document(page_content='Team: Reds\\nPayroll: 82.2\\nsource: Reds', metadata={'source': 'Reds'})]\n" ] } ], "source": [ "print(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 1 }