Update pdf.py comment for PyPDFLoader (#10495)

PyPDF does not chunk at the character level to my understanding.

Description: PyPDF does not chunk at the character level, but instead
breaks up content by page. Fixup comment

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/6605/head
Kyle Pancamo 1 year ago committed by GitHub
parent 4236ae3851
commit 203258b4d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,7 +13,7 @@ if TYPE_CHECKING:
class PyPDFParser(BaseBlobParser):
"""Load `PDF` using `pypdf` and chunk at character level."""
"""Load `PDF` using `pypdf`"""
def __init__(self, password: Optional[Union[str, bytes]] = None):
self.password = password

@ -135,9 +135,9 @@ class OnlinePDFLoader(BasePDFLoader):
class PyPDFLoader(BasePDFLoader):
"""Load `PDF using `pypdf` and chunks at character level.
"""Load PDF using pypdf into list of documents.
Loader also stores page numbers in metadata.
Loader chunks by page and stores page numbers in metadata.
"""
def __init__(

Loading…
Cancel
Save