From 5be465bd86f940cf831e3a4d2841d92ce8699ffb Mon Sep 17 00:00:00 2001 From: MIDORIBIN Date: Mon, 19 Jun 2023 08:39:57 +0900 Subject: [PATCH] Fixed PermissionError on windows (#6170) Fixed PermissionError that occurred when downloading PDF files via http in BasePDFLoader on windows. When downloading PDF files via http in BasePDFLoader, NamedTemporaryFile is used. This function cannot open the file again on **Windows**.[Python Doc](https://docs.python.org/3.9/library/tempfile.html#tempfile.NamedTemporaryFile) So, we created a **temporary directory** with TemporaryDirectory and placed the downloaded file there. temporary directory is deleted in the deconstruct. Fixes #2698 #### Who can review? Tag maintainers/contributors who might be interested: - @eyurtsev - @hwchase17 --- langchain/document_loaders/pdf.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/langchain/document_loaders/pdf.py b/langchain/document_loaders/pdf.py index 5f204f1f7f..1d334e097a 100644 --- a/langchain/document_loaders/pdf.py +++ b/langchain/document_loaders/pdf.py @@ -62,15 +62,17 @@ class BasePDFLoader(BaseLoader, ABC): ) self.web_path = self.file_path - self.temp_file = tempfile.NamedTemporaryFile() - self.temp_file.write(r.content) - self.file_path = self.temp_file.name + self.temp_dir = tempfile.TemporaryDirectory() + temp_pdf = Path(self.temp_dir.name) / "tmp.pdf" + with open(temp_pdf, mode="wb") as f: + f.write(r.content) + self.file_path = str(temp_pdf) elif not os.path.isfile(self.file_path): raise ValueError("File path %s is not a valid file or url" % self.file_path) def __del__(self) -> None: - if hasattr(self, "temp_file"): - self.temp_file.close() + if hasattr(self, "temp_dir"): + self.temp_dir.cleanup() @staticmethod def _is_valid_url(url: str) -> bool: