clean up loaders (#1178)

searx-query-suffixy
Harrison Chase 1 year ago committed by GitHub
parent 9962bda70b
commit 4766b20223
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class AZLyricsLoader(WebBaseLoader):
"""Loader that loads AZLyrics webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]:
"""Load webpage."""
soup = self.scrape()

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class CollegeConfidentialLoader(WebBaseLoader):
"""Loader that loads College Confidential webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]:
"""Load webpage."""
soup = self.scrape()

@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
class IMSDbLoader(WebBaseLoader):
"""Loader that loads IMSDb webpages."""
def __init__(self, web_path: str):
"""Initialize with webpage path."""
self.web_path = web_path
def load(self) -> List[Document]:
"""Load webpage."""
soup = self.scrape()

@ -2,18 +2,19 @@
import os
from typing import List
from unstructured.__version__ import __version__ as __unstructured_version__
from unstructured.file_utils.filetype import FileType, detect_filetype
from langchain.document_loaders.unstructured import UnstructuredFileLoader
unstructured_version = tuple([int(x) for x in __unstructured_version__.split(".")])
class UnstructuredPowerPointLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load powerpoint files."""
def _get_elements(self) -> List:
from unstructured.__version__ import __version__ as __unstructured_version__
from unstructured.file_utils.filetype import FileType, detect_filetype
unstructured_version = tuple(
[int(x) for x in __unstructured_version__.split(".")]
)
# NOTE(MthwRobinson) - magic will raise an import error if the libmagic
# system dependency isn't installed. If it's not installed, we'll just
# check the file extension

Loading…
Cancel
Save