forked from Archives/langchain
Add minor fixes for PySpark Document Loader Docs (#5525)
# Add minor fixes for PySpark Document Loader Docs Renamed "PySpack" to "PySpark" and executed the notebook to show outputs.
This commit is contained in:
parent
af41cdfc8b
commit
d765d77e9b
@ -1,17 +1,18 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# PySpack DataFrame Loader\n",
|
"# PySpark DataFrame Loader\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This shows how to load data from a PySpark DataFrame"
|
"This notebook goes over how to load data from a [PySpark](https://spark.apache.org/docs/latest/api/python/) DataFrame."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -20,7 +21,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -29,16 +30,26 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Setting default log level to \"WARN\".\n",
|
||||||
|
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
||||||
|
"23/05/31 14:08:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"spark = SparkSession.builder.getOrCreate()"
|
"spark = SparkSession.builder.getOrCreate()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -47,7 +58,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -56,7 +67,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -65,9 +76,56 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[Stage 8:> (0 + 1) / 1]\r"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': ' 81.34', ' \"Wins\"': ' 98'}),\n",
|
||||||
|
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': ' 82.20', ' \"Wins\"': ' 97'}),\n",
|
||||||
|
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': ' 197.96', ' \"Wins\"': ' 95'}),\n",
|
||||||
|
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': ' 117.62', ' \"Wins\"': ' 94'}),\n",
|
||||||
|
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': ' 83.31', ' \"Wins\"': ' 94'}),\n",
|
||||||
|
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': ' 55.37', ' \"Wins\"': ' 94'}),\n",
|
||||||
|
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': ' 120.51', ' \"Wins\"': ' 93'}),\n",
|
||||||
|
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': ' 81.43', ' \"Wins\"': ' 93'}),\n",
|
||||||
|
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': ' 64.17', ' \"Wins\"': ' 90'}),\n",
|
||||||
|
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': ' 154.49', ' \"Wins\"': ' 89'}),\n",
|
||||||
|
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': ' 132.30', ' \"Wins\"': ' 88'}),\n",
|
||||||
|
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': ' 110.30', ' \"Wins\"': ' 88'}),\n",
|
||||||
|
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': ' 95.14', ' \"Wins\"': ' 86'}),\n",
|
||||||
|
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': ' 96.92', ' \"Wins\"': ' 85'}),\n",
|
||||||
|
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': ' 97.65', ' \"Wins\"': ' 83'}),\n",
|
||||||
|
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': ' 174.54', ' \"Wins\"': ' 81'}),\n",
|
||||||
|
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': ' 74.28', ' \"Wins\"': ' 81'}),\n",
|
||||||
|
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': ' 63.43', ' \"Wins\"': ' 79'}),\n",
|
||||||
|
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': ' 55.24', ' \"Wins\"': ' 76'}),\n",
|
||||||
|
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': ' 81.97', ' \"Wins\"': ' 75'}),\n",
|
||||||
|
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': ' 93.35', ' \"Wins\"': ' 74'}),\n",
|
||||||
|
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': ' 75.48', ' \"Wins\"': ' 73'}),\n",
|
||||||
|
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': ' 60.91', ' \"Wins\"': ' 72'}),\n",
|
||||||
|
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': ' 118.07', ' \"Wins\"': ' 69'}),\n",
|
||||||
|
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': ' 173.18', ' \"Wins\"': ' 69'}),\n",
|
||||||
|
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': ' 78.43', ' \"Wins\"': ' 68'}),\n",
|
||||||
|
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': ' 94.08', ' \"Wins\"': ' 66'}),\n",
|
||||||
|
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': ' 78.06', ' \"Wins\"': ' 64'}),\n",
|
||||||
|
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': ' 88.19', ' \"Wins\"': ' 61'}),\n",
|
||||||
|
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': ' 60.65', ' \"Wins\"': ' 55'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"loader.load()"
|
"loader.load()"
|
||||||
]
|
]
|
||||||
@ -89,7 +147,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.1"
|
"version": "3.10.9"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
Reference in New Issue
Block a user