forked from Archives/langchain
001b147450
# Lint sphinx documentation and fix broken links This PR lints multiple warnings shown in generation of the project documentation (using "make docs_linkcheck" and "make docs_build"). Additionally documentation internal links to (now?) non-existent files are modified to point to existing documents as it seemed the new correct target. The documentation is not updated content wise. There are no source code changes. Fixes # (issue) - broken documentation links to other files within the project - sphinx formatting (linting) ## Before submitting No source code changes, so no new tests added. --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
130 lines
4.8 KiB
Python
130 lines
4.8 KiB
Python
"""Util that calls Google Search."""
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
from pydantic import BaseModel, Extra, root_validator
|
||
|
||
from langchain.utils import get_from_dict_or_env
|
||
|
||
|
||
class GoogleSearchAPIWrapper(BaseModel):
|
||
"""Wrapper for Google Search API.
|
||
|
||
Adapted from: Instructions adapted from https://stackoverflow.com/questions/
|
||
37083058/
|
||
programmatically-searching-google-in-python-using-custom-search
|
||
|
||
TODO: DOCS for using it
|
||
1. Install google-api-python-client
|
||
- If you don't already have a Google account, sign up.
|
||
- If you have never created a Google APIs Console project,
|
||
read the Managing Projects page and create a project in the Google API Console.
|
||
- Install the library using pip install google-api-python-client
|
||
The current version of the library is 2.70.0 at this time
|
||
|
||
2. To create an API key:
|
||
- Navigate to the APIs & Services→Credentials panel in Cloud Console.
|
||
- Select Create credentials, then select API key from the drop-down menu.
|
||
- The API key created dialog box displays your newly created key.
|
||
- You now have an API_KEY
|
||
|
||
3. Setup Custom Search Engine so you can search the entire web
|
||
- Create a custom search engine in this link.
|
||
- In Sites to search, add any valid URL (i.e. www.stackoverflow.com).
|
||
- That’s all you have to fill up, the rest doesn’t matter.
|
||
In the left-side menu, click Edit search engine → {your search engine name}
|
||
→ Setup Set Search the entire web to ON. Remove the URL you added from
|
||
the list of Sites to search.
|
||
- Under Search engine ID you’ll find the search-engine-ID.
|
||
|
||
4. Enable the Custom Search API
|
||
- Navigate to the APIs & Services→Dashboard panel in Cloud Console.
|
||
- Click Enable APIs and Services.
|
||
- Search for Custom Search API and click on it.
|
||
- Click Enable.
|
||
URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis
|
||
.com
|
||
"""
|
||
|
||
search_engine: Any #: :meta private:
|
||
google_api_key: Optional[str] = None
|
||
google_cse_id: Optional[str] = None
|
||
k: int = 10
|
||
siterestrict: bool = False
|
||
|
||
class Config:
|
||
"""Configuration for this pydantic object."""
|
||
|
||
extra = Extra.forbid
|
||
|
||
def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]:
|
||
cse = self.search_engine.cse()
|
||
if self.siterestrict:
|
||
cse = cse.siterestrict()
|
||
res = cse.list(q=search_term, cx=self.google_cse_id, **kwargs).execute()
|
||
return res.get("items", [])
|
||
|
||
@root_validator()
|
||
def validate_environment(cls, values: Dict) -> Dict:
|
||
"""Validate that api key and python package exists in environment."""
|
||
google_api_key = get_from_dict_or_env(
|
||
values, "google_api_key", "GOOGLE_API_KEY"
|
||
)
|
||
values["google_api_key"] = google_api_key
|
||
|
||
google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID")
|
||
values["google_cse_id"] = google_cse_id
|
||
|
||
try:
|
||
from googleapiclient.discovery import build
|
||
|
||
except ImportError:
|
||
raise ImportError(
|
||
"google-api-python-client is not installed. "
|
||
"Please install it with `pip install google-api-python-client`"
|
||
)
|
||
|
||
service = build("customsearch", "v1", developerKey=google_api_key)
|
||
values["search_engine"] = service
|
||
|
||
return values
|
||
|
||
def run(self, query: str) -> str:
|
||
"""Run query through GoogleSearch and parse result."""
|
||
snippets = []
|
||
results = self._google_search_results(query, num=self.k)
|
||
if len(results) == 0:
|
||
return "No good Google Search Result was found"
|
||
for result in results:
|
||
if "snippet" in result:
|
||
snippets.append(result["snippet"])
|
||
|
||
return " ".join(snippets)
|
||
|
||
def results(self, query: str, num_results: int) -> List[Dict]:
|
||
"""Run query through GoogleSearch and return metadata.
|
||
|
||
Args:
|
||
query: The query to search for.
|
||
num_results: The number of results to return.
|
||
|
||
Returns:
|
||
A list of dictionaries with the following keys:
|
||
snippet - The description of the result.
|
||
title - The title of the result.
|
||
link - The link to the result.
|
||
"""
|
||
metadata_results = []
|
||
results = self._google_search_results(query, num=num_results)
|
||
if len(results) == 0:
|
||
return [{"Result": "No good Google Search Result was found"}]
|
||
for result in results:
|
||
metadata_result = {
|
||
"title": result["title"],
|
||
"link": result["link"],
|
||
}
|
||
if "snippet" in result:
|
||
metadata_result["snippet"] = result["snippet"]
|
||
metadata_results.append(metadata_result)
|
||
|
||
return metadata_results
|