mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Change WebBaseLoader metadata parsing to set missing metadata to descriptive string instead of None
(#8175)
Solves #8174 & #3542 Co-authored-by: mevans <mevans@palantir.com>
This commit is contained in:
parent
1a7d8667c8
commit
72eb4fa4e8
@ -30,9 +30,9 @@ def _build_metadata(soup: Any, url: str) -> dict:
|
|||||||
if title := soup.find("title"):
|
if title := soup.find("title"):
|
||||||
metadata["title"] = title.get_text()
|
metadata["title"] = title.get_text()
|
||||||
if description := soup.find("meta", attrs={"name": "description"}):
|
if description := soup.find("meta", attrs={"name": "description"}):
|
||||||
metadata["description"] = description.get("content", None)
|
metadata["description"] = description.get("content", "No description found.")
|
||||||
if html := soup.find("html"):
|
if html := soup.find("html"):
|
||||||
metadata["language"] = html.get("lang", None)
|
metadata["language"] = html.get("lang", "No language found.")
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user