mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
034a8c7c1b
**Description:** - Updated constructors in PyPDFParser and PyPDFLoader to handle `extraction_mode` and additional kwargs, aligning with the capabilities of `PageObject.extract_text()` from pypdf. - Added `test_pypdf_loader_with_layout` along with a corresponding example text file to validate layout extraction from PDFs. **Issue:** fixes #19735 **Dependencies:** This change requires updating the pypdf dependency from version 3.4.0 to at least 4.0.0. Additional changes include the addition of a new test test_pypdf_loader_with_layout and an example text file to ensure the functionality of layout extraction from PDFs aligns with the new capabilities. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
35 lines
742 B
TOML
35 lines
742 B
TOML
[tool.poetry]
|
|
name = "mongo-parent-document-retrieval"
|
|
version = "0.0.1"
|
|
description = "RAG using MongoDB and OpenAI"
|
|
authors = []
|
|
readme = "README.md"
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.8.1,<4.0"
|
|
langchain = "^0.1"
|
|
openai = "<2"
|
|
pymongo = "^4.6.0"
|
|
pypdf = "^4.0.0"
|
|
tiktoken = "^0.5.1"
|
|
langchain-text-splitters = ">=0.0.1,<0.1"
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
langchain-cli = ">=0.0.21"
|
|
fastapi = ">=0.104.0,<1"
|
|
sse-starlette = "^1.6.5"
|
|
|
|
[tool.langserve]
|
|
export_module = "mongo_parent_document_retrieval"
|
|
export_attr = "chain"
|
|
|
|
[tool.templates-hub]
|
|
use-case = "rag"
|
|
author = "LangChain"
|
|
integrations = ["MongoDB", "OpenAI"]
|
|
tags = ["vectordb"]
|
|
|
|
[build-system]
|
|
requires = ["poetry-core"]
|
|
build-backend = "poetry.core.masonry.api"
|