langchain/templates/nvidia-rag-canonical/pyproject.toml
Brice Fotzo 034a8c7c1b
community: support advanced text extraction options for pdf documents (#20265)
**Description:** 
- Updated constructors in PyPDFParser and PyPDFLoader to handle
`extraction_mode` and additional kwargs, aligning with the capabilities
of `PageObject.extract_text()` from pypdf.

- Added `test_pypdf_loader_with_layout` along with a corresponding
example text file to validate layout extraction from PDFs.

**Issue:** fixes #19735 

**Dependencies:** This change requires updating the pypdf dependency
from version 3.4.0 to at least 4.0.0.

Additional changes include the addition of a new test
test_pypdf_loader_with_layout and an example text file to ensure the
functionality of layout extraction from PDFs aligns with the new
capabilities.

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2024-07-17 20:47:09 +00:00

32 lines
711 B
TOML

[tool.poetry]
name = "nvidia-rag-canonical"
version = "0.1.0"
description = "RAG with NVIDIA"
authors = ["Sagar Bogadi Manjunath <sbogadimanju@nvidia.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = "^0.1"
pymilvus = ">=2.3.0"
langchain-nvidia-aiplay = "^0.0.2"
pypdf = ">=4.0.1"
langchain-text-splitters = ">=0.0.1,<0.1"
[tool.poetry.group.dev.dependencies]
langchain-cli = ">=0.0.21"
[tool.langserve]
export_module = "nvidia_rag_canonical"
export_attr = "chain"
[tool.templates-hub]
use-case = "rag"
author = "LangChain"
integrations = ["Milvus", "NVIDIA"]
tags = ["vectordbs"]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"