clean up loaders (#1178)

searx-query-suffixy
Harrison Chase 1 year ago committed by GitHub
parent 9962bda70b
commit 4766b20223
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class AZLyricsLoader(WebBaseLoader): class AZLyricsLoader(WebBaseLoader):
"""Loader that loads AZLyrics webpages.""" """Loader that loads AZLyrics webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load webpage.""" """Load webpage."""
soup = self.scrape() soup = self.scrape()

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class CollegeConfidentialLoader(WebBaseLoader): class CollegeConfidentialLoader(WebBaseLoader):
"""Loader that loads College Confidential webpages.""" """Loader that loads College Confidential webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load webpage.""" """Load webpage."""
soup = self.scrape() soup = self.scrape()

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class IMSDbLoader(WebBaseLoader): class IMSDbLoader(WebBaseLoader):
"""Loader that loads IMSDb webpages.""" """Loader that loads IMSDb webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load webpage.""" """Load webpage."""
soup = self.scrape() soup = self.scrape()

@ -2,18 +2,19 @@
import os import os
from typing import List from typing import List
from unstructured.__version__ import __version__ as __unstructured_version__
from unstructured.file_utils.filetype import FileType, detect_filetype
from langchain.document_loaders.unstructured import UnstructuredFileLoader from langchain.document_loaders.unstructured import UnstructuredFileLoader
unstructured_version = tuple([int(x) for x in __unstructured_version__.split(".")])
class UnstructuredPowerPointLoader(UnstructuredFileLoader): class UnstructuredPowerPointLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load powerpoint files.""" """Loader that uses unstructured to load powerpoint files."""
def _get_elements(self) -> List: def _get_elements(self) -> List:
from unstructured.__version__ import __version__ as __unstructured_version__
from unstructured.file_utils.filetype import FileType, detect_filetype
unstructured_version = tuple(
[int(x) for x in __unstructured_version__.split(".")]
)
# NOTE(MthwRobinson) - magic will raise an import error if the libmagic # NOTE(MthwRobinson) - magic will raise an import error if the libmagic
# system dependency isn't installed. If it's not installed, we'll just # system dependency isn't installed. If it's not installed, we'll just
# check the file extension # check the file extension

Loading…
Cancel
Save