2024-03-16 14:52:05 +00:00
|
|
|
from application.parser.remote.base import BaseRemote
|
|
|
|
from langchain_community.document_loaders import RedditPostsLoader
|
|
|
|
|
|
|
|
|
|
|
|
class RedditPostsLoaderRemote(BaseRemote):
|
|
|
|
def load_data(self, inputs):
|
2024-03-26 10:37:44 +00:00
|
|
|
data = eval(inputs)
|
|
|
|
client_id = data.get("client_id")
|
|
|
|
client_secret = data.get("client_secret")
|
|
|
|
user_agent = data.get("user_agent")
|
|
|
|
categories = data.get("categories", ["new", "hot"])
|
|
|
|
mode = data.get("mode", "subreddit")
|
|
|
|
search_queries = data.get("search_queries")
|
2024-03-27 13:50:55 +00:00
|
|
|
number_posts = data.get("number_posts", 10)
|
2024-03-16 14:52:05 +00:00
|
|
|
self.loader = RedditPostsLoader(
|
|
|
|
client_id=client_id,
|
|
|
|
client_secret=client_secret,
|
|
|
|
user_agent=user_agent,
|
|
|
|
categories=categories,
|
|
|
|
mode=mode,
|
|
|
|
search_queries=search_queries,
|
2024-03-27 13:50:55 +00:00
|
|
|
number_posts=number_posts,
|
2024-03-16 14:52:05 +00:00
|
|
|
)
|
2024-03-26 10:37:44 +00:00
|
|
|
documents = self.loader.load()
|
2024-03-16 14:52:05 +00:00
|
|
|
print(f"Loaded {len(documents)} documents from Reddit")
|
2024-03-26 10:37:44 +00:00
|
|
|
return documents
|