You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/langchain_community/utilities/google_search.py

145 lines
5.1 KiB
Python

"""Util that calls Google Search."""
from typing import Any, Dict, List, Optional
from langchain_core._api.deprecation import deprecated
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
from langchain_core.utils import get_from_dict_or_env
@deprecated(
since="0.0.33",
removal="0.2.0",
alternative_import="langchain_google_community.GoogleSearchAPIWrapper",
)
class GoogleSearchAPIWrapper(BaseModel):
"""Wrapper for Google Search API.
Adapted from: Instructions adapted from https://stackoverflow.com/questions/
37083058/
programmatically-searching-google-in-python-using-custom-search
TODO: DOCS for using it
1. Install google-api-python-client
- If you don't already have a Google account, sign up.
- If you have never created a Google APIs Console project,
read the Managing Projects page and create a project in the Google API Console.
- Install the library using pip install google-api-python-client
2. Enable the Custom Search API
- Navigate to the APIs & Services→Dashboard panel in Cloud Console.
- Click Enable APIs and Services.
- Search for Custom Search API and click on it.
- Click Enable.
URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis
.com
3. To create an API key:
- Navigate to the APIs & Services → Credentials panel in Cloud Console.
- Select Create credentials, then select API key from the drop-down menu.
- The API key created dialog box displays your newly created key.
- You now have an API_KEY
Alternatively, you can just generate an API key here:
https://developers.google.com/custom-search/docs/paid_element#api_key
4. Setup Custom Search Engine so you can search the entire web
- Create a custom search engine here: https://programmablesearchengine.google.com/.
- In `What to search` to search, pick the `Search the entire Web` option.
After search engine is created, you can click on it and find `Search engine ID`
on the Overview page.
"""
search_engine: Any #: :meta private:
google_api_key: Optional[str] = None
google_cse_id: Optional[str] = None
k: int = 10
siterestrict: bool = False
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]:
cse = self.search_engine.cse()
if self.siterestrict:
cse = cse.siterestrict()
res = cse.list(q=search_term, cx=self.google_cse_id, **kwargs).execute()
return res.get("items", [])
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
google_api_key = get_from_dict_or_env(
values, "google_api_key", "GOOGLE_API_KEY"
)
values["google_api_key"] = google_api_key
google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID")
values["google_cse_id"] = google_cse_id
try:
from googleapiclient.discovery import build
except ImportError:
raise ImportError(
"google-api-python-client is not installed. "
"Please install it with `pip install google-api-python-client"
">=2.100.0`"
)
service = build("customsearch", "v1", developerKey=google_api_key)
values["search_engine"] = service
return values
def run(self, query: str) -> str:
"""Run query through GoogleSearch and parse result."""
snippets = []
results = self._google_search_results(query, num=self.k)
if len(results) == 0:
return "No good Google Search Result was found"
for result in results:
if "snippet" in result:
snippets.append(result["snippet"])
return " ".join(snippets)
def results(
self,
query: str,
num_results: int,
search_params: Optional[Dict[str, str]] = None,
) -> List[Dict]:
"""Run query through GoogleSearch and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
search_params: Parameters to be passed on search
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
metadata_results = []
results = self._google_search_results(
query, num=num_results, **(search_params or {})
)
if len(results) == 0:
return [{"Result": "No good Google Search Result was found"}]
for result in results:
metadata_result = {
"title": result["title"],
"link": result["link"],
}
if "snippet" in result:
metadata_result["snippet"] = result["snippet"]
metadata_results.append(metadata_result)
return metadata_results