2023-03-10 00:35:18 +00:00
{
"cells": [
{
"cell_type": "markdown",
2023-03-19 17:32:13 +00:00
"metadata": {
"collapsed": false
},
2023-03-10 00:35:18 +00:00
"source": [
"# CSV Loader\n",
"\n",
"Load csv files with a single row per document."
2023-03-19 17:32:13 +00:00
]
2023-03-10 00:35:18 +00:00
},
{
"cell_type": "code",
2023-03-19 17:32:13 +00:00
"execution_count": 25,
2023-03-10 00:35:18 +00:00
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
2023-03-19 17:32:13 +00:00
"from langchain.document_loaders.csv_loader import CSVLoader"
2023-03-10 00:35:18 +00:00
]
},
{
"cell_type": "code",
2023-03-19 17:32:13 +00:00
"execution_count": 26,
"metadata": {
"collapsed": false
},
2023-03-10 00:35:18 +00:00
"outputs": [],
"source": [
"loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv')\n",
"\n",
"data = loader.load()"
2023-03-19 17:32:13 +00:00
]
2023-03-10 00:35:18 +00:00
},
{
"cell_type": "code",
2023-03-19 17:32:13 +00:00
"execution_count": 27,
"metadata": {
"collapsed": false
},
2023-03-10 00:35:18 +00:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2023-03-19 17:32:13 +00:00
"[Document(page_content='Team: Nationals\\n\"Payroll (millions)\": 81.34\\n\"Wins\": 98', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 0}, lookup_index=0), Document(page_content='Team: Reds\\n\"Payroll (millions)\": 82.20\\n\"Wins\": 97', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 1}, lookup_index=0), Document(page_content='Team: Yankees\\n\"Payroll (millions)\": 197.96\\n\"Wins\": 95', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 2}, lookup_index=0), Document(page_content='Team: Giants\\n\"Payroll (millions)\": 117.62\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 3}, lookup_index=0), Document(page_content='Team: Braves\\n\"Payroll (millions)\": 83.31\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 4}, lookup_index=0), Document(page_content='Team: Athletics\\n\"Payroll (millions)\": 55.37\\n\"Wins\": 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 5}, lookup_index=0), Document(page_content='Team: Rangers\\n\"Payroll (millions)\": 120.51\\n\"Wins\": 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 6}, lookup_index=0), Document(page_content='Team: Orioles\\n\"Payroll (millions)\": 81.43\\n\"Wins\": 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 7}, lookup_index=0), Document(page_content='Team: Rays\\n\"Payroll (millions)\": 64.17\\n\"Wins\": 90', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 8}, lookup_index=0), Document(page_content='Team: Angels\\n\"Payroll (millions)\": 154.49\\n\"Wins\": 89', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 9}, lookup_index=0), Document(page_content='Team: Tigers\\n\"Payroll (millions)\": 132.30\\n\"Wins\": 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 10}, lookup_index=0), Document(page_content='Team: Cardinals\\n\"Payroll (millions)\": 110.30\\n\"Wins\": 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 11}, lookup_index=0), Document(page_content='Team: Dodgers\\n\"Payroll (millions)\": 95.14\\n\"Wins\": 86', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 12}, lookup_index=0), Document(page_content='Team: White Sox\\n\"Payroll (millions)\": 96.92\\n\"Wins\": 85', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 13}, lookup_index=0), Document(page_content='Team: Brewers\\n\"Payroll (millions)\": 97.65\\n\"Wins\": 83', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 14}, lookup_index=0), Document(page_content='Team: Phillies\\n\"Payroll (millions)\": 174.54\\n\"Wins\": 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 15}, lookup_index=0), Document(page_content='Team: Diamondbacks\\n\"Payroll (millions)\": 74.28\\n\"Wins\": 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 16}, lookup_index=0), Document(page_content='Team: Pirates\\n\"Payroll (millions)\": 63.43\\n\"Wins\": 79', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 17}, lookup_index=0), Document(page_content='Team: Padres\\n\"Payroll (millions)\": 55.24\\n\"Wins\": 76', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 18}, lookup_index=0), Document(page_content='Team: Mariners\\n\"Payroll (millions)\": 81.97\\n\"Wins\": 75', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 19}, lookup_index=0), Document(page_content='Team: Mets\\n\"Payroll (millions)\": 93.35\\n\"Wins\": 74', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 20}, lookup_index=0), Document(page_content='Team: Blue Jays\\n\"Payroll (millions)\": 75.48\\n\"Wins\": 73', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'r
2023-03-10 00:35:18 +00:00
]
}
],
"source": [
"print(data)"
2023-03-19 17:32:13 +00:00
]
2023-03-10 00:35:18 +00:00
},
{
"cell_type": "markdown",
2023-03-19 17:32:13 +00:00
"metadata": {
"collapsed": false
},
2023-03-10 00:35:18 +00:00
"source": [
"## Customizing the csv parsing and loading\n",
"\n",
"See the [csv module](https://docs.python.org/3/library/csv.html) documentation for more information of what csv args are supported."
2023-03-19 17:32:13 +00:00
]
2023-03-10 00:35:18 +00:00
},
{
"cell_type": "code",
2023-03-19 17:32:13 +00:00
"execution_count": 28,
"metadata": {
"collapsed": false
},
2023-03-10 00:35:18 +00:00
"outputs": [],
"source": [
"loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv', csv_args={\n",
" 'delimiter': ',',\n",
" 'quotechar': '\"',\n",
" 'fieldnames': ['MLB Team', 'Payroll in millions', 'Wins']\n",
"})\n",
"\n",
"data = loader.load()"
2023-03-19 17:32:13 +00:00
]
},
{
"cell_type": "code",
"execution_count": 29,
2023-03-10 00:35:18 +00:00
"metadata": {
"collapsed": false
2023-03-19 17:32:13 +00:00
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='MLB Team: Team\\nPayroll in millions: \"Payroll (millions)\"\\nWins: \"Wins\"', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 0}, lookup_index=0), Document(page_content='MLB Team: Nationals\\nPayroll in millions: 81.34\\nWins: 98', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 1}, lookup_index=0), Document(page_content='MLB Team: Reds\\nPayroll in millions: 82.20\\nWins: 97', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 2}, lookup_index=0), Document(page_content='MLB Team: Yankees\\nPayroll in millions: 197.96\\nWins: 95', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 3}, lookup_index=0), Document(page_content='MLB Team: Giants\\nPayroll in millions: 117.62\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 4}, lookup_index=0), Document(page_content='MLB Team: Braves\\nPayroll in millions: 83.31\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 5}, lookup_index=0), Document(page_content='MLB Team: Athletics\\nPayroll in millions: 55.37\\nWins: 94', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 6}, lookup_index=0), Document(page_content='MLB Team: Rangers\\nPayroll in millions: 120.51\\nWins: 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 7}, lookup_index=0), Document(page_content='MLB Team: Orioles\\nPayroll in millions: 81.43\\nWins: 93', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 8}, lookup_index=0), Document(page_content='MLB Team: Rays\\nPayroll in millions: 64.17\\nWins: 90', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 9}, lookup_index=0), Document(page_content='MLB Team: Angels\\nPayroll in millions: 154.49\\nWins: 89', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 10}, lookup_index=0), Document(page_content='MLB Team: Tigers\\nPayroll in millions: 132.30\\nWins: 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 11}, lookup_index=0), Document(page_content='MLB Team: Cardinals\\nPayroll in millions: 110.30\\nWins: 88', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 12}, lookup_index=0), Document(page_content='MLB Team: Dodgers\\nPayroll in millions: 95.14\\nWins: 86', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 13}, lookup_index=0), Document(page_content='MLB Team: White Sox\\nPayroll in millions: 96.92\\nWins: 85', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 14}, lookup_index=0), Document(page_content='MLB Team: Brewers\\nPayroll in millions: 97.65\\nWins: 83', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 15}, lookup_index=0), Document(page_content='MLB Team: Phillies\\nPayroll in millions: 174.54\\nWins: 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 16}, lookup_index=0), Document(page_content='MLB Team: Diamondbacks\\nPayroll in millions: 74.28\\nWins: 81', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 17}, lookup_index=0), Document(page_content='MLB Team: Pirates\\nPayroll in millions: 63.43\\nWins: 79', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 18}, lookup_index=0), Document(page_content='MLB Team: Padres\\nPayroll in millions: 55.24\\nWins: 76', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 19}, lookup_index=0), Document(page_content='MLB Team: Mariners\\nPayroll in millions: 81.97\\nWins: 75', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 20}, lookup_index=0), Document(page_content='MLB Team: Mets\\nPayroll in millions: 93.35\\nWins: 74', lookup_str='', metadata={'source': './example_data/mlb_teams_2012.csv', 'row': 21}, lookup_index=0), Document(page_content
]
}
],
"source": [
"print(data)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Specify a column to be used identify the document source\n",
"\n",
"Use the `source_column` argument to specify a column to be set as the source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the csv file.\n",
"\n",
"This is useful when using documents loaded from CSV files for chains that answer questions using sources."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv', source_column=\"Team\")\n",
"\n",
"data = loader.load()"
]
2023-03-10 00:35:18 +00:00
},
{
"cell_type": "code",
2023-03-19 17:32:13 +00:00
"execution_count": 31,
"metadata": {},
2023-03-10 00:35:18 +00:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2023-03-19 17:32:13 +00:00
"[Document(page_content='Team: Nationals\\n\"Payroll (millions)\": 81.34\\n\"Wins\": 98', lookup_str='', metadata={'source': 'Nationals', 'row': 0}, lookup_index=0), Document(page_content='Team: Reds\\n\"Payroll (millions)\": 82.20\\n\"Wins\": 97', lookup_str='', metadata={'source': 'Reds', 'row': 1}, lookup_index=0), Document(page_content='Team: Yankees\\n\"Payroll (millions)\": 197.96\\n\"Wins\": 95', lookup_str='', metadata={'source': 'Yankees', 'row': 2}, lookup_index=0), Document(page_content='Team: Giants\\n\"Payroll (millions)\": 117.62\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Giants', 'row': 3}, lookup_index=0), Document(page_content='Team: Braves\\n\"Payroll (millions)\": 83.31\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Braves', 'row': 4}, lookup_index=0), Document(page_content='Team: Athletics\\n\"Payroll (millions)\": 55.37\\n\"Wins\": 94', lookup_str='', metadata={'source': 'Athletics', 'row': 5}, lookup_index=0), Document(page_content='Team: Rangers\\n\"Payroll (millions)\": 120.51\\n\"Wins\": 93', lookup_str='', metadata={'source': 'Rangers', 'row': 6}, lookup_index=0), Document(page_content='Team: Orioles\\n\"Payroll (millions)\": 81.43\\n\"Wins\": 93', lookup_str='', metadata={'source': 'Orioles', 'row': 7}, lookup_index=0), Document(page_content='Team: Rays\\n\"Payroll (millions)\": 64.17\\n\"Wins\": 90', lookup_str='', metadata={'source': 'Rays', 'row': 8}, lookup_index=0), Document(page_content='Team: Angels\\n\"Payroll (millions)\": 154.49\\n\"Wins\": 89', lookup_str='', metadata={'source': 'Angels', 'row': 9}, lookup_index=0), Document(page_content='Team: Tigers\\n\"Payroll (millions)\": 132.30\\n\"Wins\": 88', lookup_str='', metadata={'source': 'Tigers', 'row': 10}, lookup_index=0), Document(page_content='Team: Cardinals\\n\"Payroll (millions)\": 110.30\\n\"Wins\": 88', lookup_str='', metadata={'source': 'Cardinals', 'row': 11}, lookup_index=0), Document(page_content='Team: Dodgers\\n\"Payroll (millions)\": 95.14\\n\"Wins\": 86', lookup_str='', metadata={'source': 'Dodgers', 'row': 12}, lookup_index=0), Document(page_content='Team: White Sox\\n\"Payroll (millions)\": 96.92\\n\"Wins\": 85', lookup_str='', metadata={'source': 'White Sox', 'row': 13}, lookup_index=0), Document(page_content='Team: Brewers\\n\"Payroll (millions)\": 97.65\\n\"Wins\": 83', lookup_str='', metadata={'source': 'Brewers', 'row': 14}, lookup_index=0), Document(page_content='Team: Phillies\\n\"Payroll (millions)\": 174.54\\n\"Wins\": 81', lookup_str='', metadata={'source': 'Phillies', 'row': 15}, lookup_index=0), Document(page_content='Team: Diamondbacks\\n\"Payroll (millions)\": 74.28\\n\"Wins\": 81', lookup_str='', metadata={'source': 'Diamondbacks', 'row': 16}, lookup_index=0), Document(page_content='Team: Pirates\\n\"Payroll (millions)\": 63.43\\n\"Wins\": 79', lookup_str='', metadata={'source': 'Pirates', 'row': 17}, lookup_index=0), Document(page_content='Team: Padres\\n\"Payroll (millions)\": 55.24\\n\"Wins\": 76', lookup_str='', metadata={'source': 'Padres', 'row': 18}, lookup_index=0), Document(page_content='Team: Mariners\\n\"Payroll (millions)\": 81.97\\n\"Wins\": 75', lookup_str='', metadata={'source': 'Mariners', 'row': 19}, lookup_index=0), Document(page_content='Team: Mets\\n\"Payroll (millions)\": 93.35\\n\"Wins\": 74', lookup_str='', metadata={'source': 'Mets', 'row': 20}, lookup_index=0), Document(page_content='Team: Blue Jays\\n\"Payroll (millions)\": 75.48\\n\"Wins\": 73', lookup_str='', metadata={'source': 'Blue Jays', 'row': 21}, lookup_index=0), Document(page_content='Team: Royals\\n\"Payroll (millions)\": 60.91\\n\"Wins\": 72', lookup_str='', metadata={'source': 'Royals', 'row': 22}, lookup_index=0), Document(page_content='Team: Marlins\\n\"Payroll (millions)\": 118.07\\n\"Wins\": 69', lookup_str='', metadata={'source': 'Marlins', 'row': 23}, lookup_index=0), Document(page_content='Team: Red Sox\\n\"Payroll (millions)\": 173.18\\n\"Wins\": 69', lookup_str='', metadata={'source': 'Red Sox', 'row': 24}, lookup_index=0), Document(page_content='Team: Indians\\n\"Payroll (millions)
2023-03-10 00:35:18 +00:00
]
}
],
"source": [
"print(data)"
2023-03-19 17:32:13 +00:00
]
2023-03-10 00:35:18 +00:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
2023-03-19 17:32:13 +00:00
"version": 3
2023-03-10 00:35:18 +00:00
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
2023-03-19 17:32:13 +00:00
"pygments_lexer": "ipython3",
"version": "3.9.9"
2023-03-10 00:35:18 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 0
}