Change WebBaseLoader metadata parsing to set missing metadata to descriptive string instead of None (#8175)

Solves #8174 & #3542

Co-authored-by: mevans <mevans@palantir.com>
This commit is contained in:
Monty Evans 2023-07-24 20:17:49 +01:00 committed by GitHub
parent 1a7d8667c8
commit 72eb4fa4e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -30,9 +30,9 @@ def _build_metadata(soup: Any, url: str) -> dict:
if title := soup.find("title"): if title := soup.find("title"):
metadata["title"] = title.get_text() metadata["title"] = title.get_text()
if description := soup.find("meta", attrs={"name": "description"}): if description := soup.find("meta", attrs={"name": "description"}):
metadata["description"] = description.get("content", None) metadata["description"] = description.get("content", "No description found.")
if html := soup.find("html"): if html := soup.find("html"):
metadata["language"] = html.get("lang", None) metadata["language"] = html.get("lang", "No language found.")
return metadata return metadata