You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/langchain/utilities/google_search.py

130 lines
4.7 KiB
Python

"""Util that calls Google Search."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
from langchain.utils import get_from_dict_or_env
class GoogleSearchAPIWrapper(BaseModel):
"""Wrapper for Google Search API.
Adapted from: Instructions adapted from https://stackoverflow.com/questions/
37083058/
programmatically-searching-google-in-python-using-custom-search
TODO: DOCS for using it
1. Install google-api-python-client
- If you don't already have a Google account, sign up.
- If you have never created a Google APIs Console project,
read the Managing Projects page and create a project in the Google API Console.
- Install the library using pip install google-api-python-client
The current version of the library is 2.70.0 at this time
2. To create an API key:
- Navigate to the APIs & ServicesCredentials panel in Cloud Console.
- Select Create credentials, then select API key from the drop-down menu.
- The API key created dialog box displays your newly created key.
- You now have an API_KEY
3. Setup Custom Search Engine so you can search the entire web
- Create a custom search engine in this link.
- In Sites to search, add any valid URL (i.e. www.stackoverflow.com).
- Thats all you have to fill up, the rest doesnt matter.
In the left-side menu, click Edit search engine {your search engine name}
Setup Set Search the entire web to ON. Remove the URL you added from
the list of Sites to search.
- Under Search engine ID youll find the search-engine-ID.
4. Enable the Custom Search API
- Navigate to the APIs & ServicesDashboard panel in Cloud Console.
- Click Enable APIs and Services.
- Search for Custom Search API and click on it.
- Click Enable.
URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis
.com
"""
search_engine: Any #: :meta private:
google_api_key: Optional[str] = None
google_cse_id: Optional[str] = None
k: int = 10
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]:
res = (
self.search_engine.cse()
.list(q=search_term, cx=self.google_cse_id, **kwargs)
.execute()
)
return res.get("items", [])
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
google_api_key = get_from_dict_or_env(
values, "google_api_key", "GOOGLE_API_KEY"
)
values["google_api_key"] = google_api_key
google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID")
values["google_cse_id"] = google_cse_id
try:
from googleapiclient.discovery import build
except ImportError:
raise ImportError(
"google-api-python-client is not installed. "
"Please install it with `pip install google-api-python-client`"
)
service = build("customsearch", "v1", developerKey=google_api_key)
values["search_engine"] = service
return values
def run(self, query: str) -> str:
"""Run query through GoogleSearch and parse result."""
snippets = []
results = self._google_search_results(query, num=self.k)
if len(results) == 0:
return "No good Google Search Result was found"
for result in results:
if "snippet" in result:
snippets.append(result["snippet"])
return " ".join(snippets)
def results(self, query: str, num_results: int) -> List[Dict]:
"""Run query through GoogleSearch and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
metadata_results = []
results = self._google_search_results(query, num=num_results)
if len(results) == 0:
return [{"Result": "No good Google Search Result was found"}]
for result in results:
metadata_result = {
"title": result["title"],
"link": result["link"],
}
if "snippet" in result:
metadata_result["snippet"] = result["snippet"]
metadata_results.append(metadata_result)
return metadata_results