mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
community[patch]: Minor Improvement of extract hyperlinks tool output (#25728)
**Description:** Make the hyperlink only appear once in the extract_hyperlinks tool output. (for some websites output contains meaningless '#' hyperlinks multiple times which will extend the tokens of context window without any advantage) **Issue:** None **Dependencies:** None
This commit is contained in:
parent
ff0df5ea15
commit
25a6790e1a
@ -63,8 +63,9 @@ class ExtractHyperlinksTool(BaseBrowserTool):
|
||||
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
||||
else:
|
||||
links = [anchor.get("href", "") for anchor in anchors]
|
||||
# Return the list of links as a JSON string
|
||||
return json.dumps(links)
|
||||
# Return the list of links as a JSON string. Duplicated link
|
||||
# only appears once in the list
|
||||
return json.dumps(list(set(links)))
|
||||
|
||||
def _run(
|
||||
self,
|
||||
|
Loading…
Reference in New Issue
Block a user