From d765d77e9b42f2c50e70b3a1391a66fe26799ac0 Mon Sep 17 00:00:00 2001 From: Rithwik Ediga Lakhamsani <81988348+rithwik-db@users.noreply.github.com> Date: Wed, 31 May 2023 15:02:57 -0700 Subject: [PATCH] Add minor fixes for PySpark Document Loader Docs (#5525) # Add minor fixes for PySpark Document Loader Docs Renamed "PySpack" to "PySpark" and executed the notebook to show outputs. --- .../examples/pyspark_dataframe.ipynb | 82 ++++++++++++++++--- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/docs/modules/indexes/document_loaders/examples/pyspark_dataframe.ipynb b/docs/modules/indexes/document_loaders/examples/pyspark_dataframe.ipynb index a09383bb..6920f23f 100644 --- a/docs/modules/indexes/document_loaders/examples/pyspark_dataframe.ipynb +++ b/docs/modules/indexes/document_loaders/examples/pyspark_dataframe.ipynb @@ -1,17 +1,18 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# PySpack DataFrame Loader\n", + "# PySpark DataFrame Loader\n", "\n", - "This shows how to load data from a PySpark DataFrame" + "This notebook goes over how to load data from a [PySpark](https://spark.apache.org/docs/latest/api/python/) DataFrame." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -29,16 +30,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", + "23/05/31 14:08:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + ] + } + ], "source": [ "spark = SparkSession.builder.getOrCreate()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -47,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -65,9 +76,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 8:> (0 + 1) / 1]\r" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': ' 81.34', ' \"Wins\"': ' 98'}),\n", + " Document(page_content='Reds', metadata={' \"Payroll (millions)\"': ' 82.20', ' \"Wins\"': ' 97'}),\n", + " Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': ' 197.96', ' \"Wins\"': ' 95'}),\n", + " Document(page_content='Giants', metadata={' \"Payroll (millions)\"': ' 117.62', ' \"Wins\"': ' 94'}),\n", + " Document(page_content='Braves', metadata={' \"Payroll (millions)\"': ' 83.31', ' \"Wins\"': ' 94'}),\n", + " Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': ' 55.37', ' \"Wins\"': ' 94'}),\n", + " Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': ' 120.51', ' \"Wins\"': ' 93'}),\n", + " Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': ' 81.43', ' \"Wins\"': ' 93'}),\n", + " Document(page_content='Rays', metadata={' \"Payroll (millions)\"': ' 64.17', ' \"Wins\"': ' 90'}),\n", + " Document(page_content='Angels', metadata={' \"Payroll (millions)\"': ' 154.49', ' \"Wins\"': ' 89'}),\n", + " Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': ' 132.30', ' \"Wins\"': ' 88'}),\n", + " Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': ' 110.30', ' \"Wins\"': ' 88'}),\n", + " Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': ' 95.14', ' \"Wins\"': ' 86'}),\n", + " Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': ' 96.92', ' \"Wins\"': ' 85'}),\n", + " Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': ' 97.65', ' \"Wins\"': ' 83'}),\n", + " Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': ' 174.54', ' \"Wins\"': ' 81'}),\n", + " Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': ' 74.28', ' \"Wins\"': ' 81'}),\n", + " Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': ' 63.43', ' \"Wins\"': ' 79'}),\n", + " Document(page_content='Padres', metadata={' \"Payroll (millions)\"': ' 55.24', ' \"Wins\"': ' 76'}),\n", + " Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': ' 81.97', ' \"Wins\"': ' 75'}),\n", + " Document(page_content='Mets', metadata={' \"Payroll (millions)\"': ' 93.35', ' \"Wins\"': ' 74'}),\n", + " Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': ' 75.48', ' \"Wins\"': ' 73'}),\n", + " Document(page_content='Royals', metadata={' \"Payroll (millions)\"': ' 60.91', ' \"Wins\"': ' 72'}),\n", + " Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': ' 118.07', ' \"Wins\"': ' 69'}),\n", + " Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': ' 173.18', ' \"Wins\"': ' 69'}),\n", + " Document(page_content='Indians', metadata={' \"Payroll (millions)\"': ' 78.43', ' \"Wins\"': ' 68'}),\n", + " Document(page_content='Twins', metadata={' \"Payroll (millions)\"': ' 94.08', ' \"Wins\"': ' 66'}),\n", + " Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': ' 78.06', ' \"Wins\"': ' 64'}),\n", + " Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': ' 88.19', ' \"Wins\"': ' 61'}),\n", + " Document(page_content='Astros', metadata={' \"Payroll (millions)\"': ' 60.65', ' \"Wins\"': ' 55'})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "loader.load()" ] @@ -89,7 +147,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.10.9" } }, "nbformat": 4,