mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Fixed PermissionError on windows (#6170)
Fixed PermissionError that occurred when downloading PDF files via http in BasePDFLoader on windows. When downloading PDF files via http in BasePDFLoader, NamedTemporaryFile is used. This function cannot open the file again on **Windows**.[Python Doc](https://docs.python.org/3.9/library/tempfile.html#tempfile.NamedTemporaryFile) So, we created a **temporary directory** with TemporaryDirectory and placed the downloaded file there. temporary directory is deleted in the deconstruct. Fixes #2698 #### Who can review? Tag maintainers/contributors who might be interested: - @eyurtsev - @hwchase17
This commit is contained in:
parent
4fc7939848
commit
5be465bd86
@ -62,15 +62,17 @@ class BasePDFLoader(BaseLoader, ABC):
|
||||
)
|
||||
|
||||
self.web_path = self.file_path
|
||||
self.temp_file = tempfile.NamedTemporaryFile()
|
||||
self.temp_file.write(r.content)
|
||||
self.file_path = self.temp_file.name
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
temp_pdf = Path(self.temp_dir.name) / "tmp.pdf"
|
||||
with open(temp_pdf, mode="wb") as f:
|
||||
f.write(r.content)
|
||||
self.file_path = str(temp_pdf)
|
||||
elif not os.path.isfile(self.file_path):
|
||||
raise ValueError("File path %s is not a valid file or url" % self.file_path)
|
||||
|
||||
def __del__(self) -> None:
|
||||
if hasattr(self, "temp_file"):
|
||||
self.temp_file.close()
|
||||
if hasattr(self, "temp_dir"):
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
@staticmethod
|
||||
def _is_valid_url(url: str) -> bool:
|
||||
|
Loading…
Reference in New Issue
Block a user