mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
adds doc_content_chars_max argument to WikipediaLoader (#6645)
# Description It adds a new initialization param in `WikipediaLoader` so we can override the `doc_content_chars_max` param used in `WikipediaAPIWrapper` under the hood, e.g: ```python from langchain.document_loaders import WikipediaLoader # doc_content_chars_max is the new init param loader = WikipediaLoader(query="python", doc_content_chars_max=90000) ``` ## Decisions `doc_content_chars_max` default value will be 4000, because it's the current value I have added pycode comments # Issue #6639 # Dependencies None # Twitter handle [@elafo](https://twitter.com/elafo)
This commit is contained in:
parent
5e5b30b74f
commit
db8b13df4c
@ -18,17 +18,36 @@ class WikipediaLoader(BaseLoader):
|
||||
lang: str = "en",
|
||||
load_max_docs: Optional[int] = 100,
|
||||
load_all_available_meta: Optional[bool] = False,
|
||||
doc_content_chars_max: Optional[int] = 4000,
|
||||
):
|
||||
"""
|
||||
Initializes a new instance of the WikipediaLoader class.
|
||||
|
||||
Args:
|
||||
query (str): The query string to search on Wikipedia.
|
||||
lang (str, optional): The language code for the Wikipedia language edition. Defaults to "en".
|
||||
load_max_docs (int, optional): The maximum number of documents to load. Defaults to 100.
|
||||
load_all_available_meta (bool, optional): Indicates whether to load all available metadata for each document. Defaults to False.
|
||||
doc_content_chars_max (int, optional): The maximum number of characters for the document content. Defaults to 4000.
|
||||
"""
|
||||
self.query = query
|
||||
self.lang = lang
|
||||
self.load_max_docs = load_max_docs
|
||||
self.load_all_available_meta = load_all_available_meta
|
||||
self.doc_content_chars_max = doc_content_chars_max
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""
|
||||
Loads the query result from Wikipedia into a list of Documents.
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of Document objects representing the loaded Wikipedia pages.
|
||||
"""
|
||||
client = WikipediaAPIWrapper(
|
||||
lang=self.lang,
|
||||
top_k_results=self.load_max_docs,
|
||||
load_all_available_meta=self.load_all_available_meta,
|
||||
doc_content_chars_max=self.doc_content_chars_max,
|
||||
)
|
||||
docs = client.load(self.query)
|
||||
return docs
|
||||
|
Loading…
Reference in New Issue
Block a user