From 7824291252c8f99a31c416f363d6a46d7e8f63e4 Mon Sep 17 00:00:00 2001 From: Hyeongchan Kim Date: Wed, 17 Apr 2024 10:06:21 +0900 Subject: [PATCH] community[patch]: Fix not to cast to str type when `file_path` is None (#20057) From `langchain_community 0.0.30`, there's a bug that cannot send a file-like object via `file` parameter instead of `file path` due to casting the `file_path` to str type even if `file_path` is None. which means that when I call the `partition_via_api()`, exactly one of `filename` and `file` must be specified by the following error message. however, from `langchain_community 0.0.30`, `file_path` is casted into `str` type even `file_path` is None in `get_elements_from_api()` and got an error at `exactly_one(filename=filename, file=file)`. here's an error message ``` ---> 51 exactly_one(filename=filename, file=file) 53 if metadata_filename and file_filename: 54 raise ValueError( 55 "Only one of metadata_filename and file_filename is specified. " 56 "metadata_filename is preferred. file_filename is marked for deprecation.", 57 ) File /opt/homebrew/lib/python3.11/site-packages/unstructured/partition/common.py:441, in exactly_one(**kwargs) 439 else: 440 message = f"{names[0]} must be specified." --> 441 raise ValueError(message) ValueError: Exactly one of filename and file must be specified. ``` So, I simply made a change that casting to str type when `file_path` is not None. I use `UnstructuredAPIFileLoader` like below. ``` from langchain_community.document_loaders.unstructured import UnstructuredAPIFileLoader documents: list = UnstructuredAPIFileLoader( file_path=None, file=file, # file-like object, io.BytesIO type mode='elements', url='http://127.0.0.1:8000/general/v0/general', content_type='application/pdf', metadata_filename='asdf.pdf', ).load_and_split() ``` --- .../langchain_community/document_loaders/unstructured.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/unstructured.py b/libs/community/langchain_community/document_loaders/unstructured.py index bc056ca702..f990fd185e 100644 --- a/libs/community/langchain_community/document_loaders/unstructured.py +++ b/libs/community/langchain_community/document_loaders/unstructured.py @@ -213,7 +213,7 @@ def get_elements_from_api( from unstructured.partition.api import partition_via_api return partition_via_api( - filename=str(file_path), + filename=str(file_path) if file_path is not None else None, file=file, api_key=api_key, api_url=api_url,