From ed0d557ede8776921cc3c5ca1f3aef81d3d0c7b5 Mon Sep 17 00:00:00 2001 From: Tim Asp <707699+timothyasp@users.noreply.github.com> Date: Fri, 12 May 2023 12:03:01 -0700 Subject: [PATCH] docs: fix pdf docs hierarchy and formatting (#4593) # Fix pdf loader docs page ![image](https://github.com/hwchase17/langchain/assets/707699/4a11f379-00ed-4f7a-9870-71f74e0cadc6) Using h1's messes with hierarchy, this fixes that, and moves the PyPDFium2 loader out of the middle of PDFMiner docs --- .../document_loaders/examples/pdf.ipynb | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/docs/modules/indexes/document_loaders/examples/pdf.ipynb b/docs/modules/indexes/document_loaders/examples/pdf.ipynb index e1ec7035..abccc80c 100644 --- a/docs/modules/indexes/document_loaders/examples/pdf.ipynb +++ b/docs/modules/indexes/document_loaders/examples/pdf.ipynb @@ -337,75 +337,73 @@ }, { "cell_type": "markdown", - "id": "05187b33", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "id": "21998d18", + "id": "96351714", "metadata": {}, "source": [ - "## Using PDFMiner" + "## Using PyPDFium2" ] }, { "cell_type": "code", - "execution_count": 7, - "id": "2f0cc9ff", + "execution_count": 1, + "id": "003fcc1d", "metadata": {}, "outputs": [], "source": [ - "from langchain.document_loaders import PDFMinerLoader" + "from langchain.document_loaders import PyPDFium2Loader" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "42b531e8", + "execution_count": 3, + "id": "46766e29", "metadata": {}, "outputs": [], "source": [ - "loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")" + "loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")" ] }, { "cell_type": "code", "execution_count": 9, - "id": "483720b5", - "metadata": {}, "outputs": [], "source": [ "data = loader.load()" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "markdown", - "id": "96351714", - "metadata": {}, "source": [ - "# Using PyPDFium2" - ] + "## Using PDFMiner" + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": 1, - "id": "003fcc1d", - "metadata": {}, + "execution_count": 7, "outputs": [], "source": [ - "from langchain.document_loaders import PyPDFium2Loader" - ] + "from langchain.document_loaders import PDFMinerLoader" + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", - "execution_count": 3, - "id": "46766e29", - "metadata": {}, + "execution_count": 8, "outputs": [], "source": [ - "loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")" - ] + "loader = PDFMinerLoader(\"example_data/layout-parser-paper.pdf\")" + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", @@ -422,7 +420,7 @@ "id": "c90a5fe8", "metadata": {}, "source": [ - "## Using PDFMiner to generate HTML text" + "### Using PDFMiner to generate HTML text" ] }, {