From 4766b20223c816563cc3232cff6fab74df97f519 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 20 Feb 2023 08:20:48 -0800 Subject: [PATCH] clean up loaders (#1178) --- langchain/document_loaders/azlyrics.py | 4 ---- langchain/document_loaders/college_confidential.py | 4 ---- langchain/document_loaders/imsdb.py | 4 ---- langchain/document_loaders/powerpoint.py | 11 ++++++----- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/langchain/document_loaders/azlyrics.py b/langchain/document_loaders/azlyrics.py index 538fc380..0947946c 100644 --- a/langchain/document_loaders/azlyrics.py +++ b/langchain/document_loaders/azlyrics.py @@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader class AZLyricsLoader(WebBaseLoader): """Loader that loads AZLyrics webpages.""" - def __init__(self, web_path: str): - """Initialize with webpage path.""" - self.web_path = web_path - def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() diff --git a/langchain/document_loaders/college_confidential.py b/langchain/document_loaders/college_confidential.py index 15bb7512..1eaa64bc 100644 --- a/langchain/document_loaders/college_confidential.py +++ b/langchain/document_loaders/college_confidential.py @@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader class CollegeConfidentialLoader(WebBaseLoader): """Loader that loads College Confidential webpages.""" - def __init__(self, web_path: str): - """Initialize with webpage path.""" - self.web_path = web_path - def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() diff --git a/langchain/document_loaders/imsdb.py b/langchain/document_loaders/imsdb.py index 9084895a..4589553d 100644 --- a/langchain/document_loaders/imsdb.py +++ b/langchain/document_loaders/imsdb.py @@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader class IMSDbLoader(WebBaseLoader): """Loader that loads IMSDb webpages.""" - def __init__(self, web_path: str): - """Initialize with webpage path.""" - self.web_path = web_path - def load(self) -> List[Document]: """Load webpage.""" soup = self.scrape() diff --git a/langchain/document_loaders/powerpoint.py b/langchain/document_loaders/powerpoint.py index d3443ec7..9ed230b2 100644 --- a/langchain/document_loaders/powerpoint.py +++ b/langchain/document_loaders/powerpoint.py @@ -2,18 +2,19 @@ import os from typing import List -from unstructured.__version__ import __version__ as __unstructured_version__ -from unstructured.file_utils.filetype import FileType, detect_filetype - from langchain.document_loaders.unstructured import UnstructuredFileLoader -unstructured_version = tuple([int(x) for x in __unstructured_version__.split(".")]) - class UnstructuredPowerPointLoader(UnstructuredFileLoader): """Loader that uses unstructured to load powerpoint files.""" def _get_elements(self) -> List: + from unstructured.__version__ import __version__ as __unstructured_version__ + from unstructured.file_utils.filetype import FileType, detect_filetype + + unstructured_version = tuple( + [int(x) for x in __unstructured_version__.split(".")] + ) # NOTE(MthwRobinson) - magic will raise an import error if the libmagic # system dependency isn't installed. If it's not installed, we'll just # check the file extension