langchain/langchain/utilities/google_search.py
sseide 001b147450
Documentation fixes (linting and broken links) (#5563)
# Lint sphinx documentation and fix broken links

This PR lints multiple warnings shown in generation of the project
documentation (using "make docs_linkcheck" and "make docs_build").
Additionally documentation internal links to (now?) non-existent files
are modified to point to existing documents as it seemed the new correct
target.

The documentation is not updated content wise.
There are no source code changes.

Fixes # (issue)

- broken documentation links to other files within the project
- sphinx formatting (linting)

## Before submitting

No source code changes, so no new tests added.

---------

Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
2023-06-01 13:06:17 -07:00

130 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Util that calls Google Search."""
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Extra, root_validator
from langchain.utils import get_from_dict_or_env
class GoogleSearchAPIWrapper(BaseModel):
"""Wrapper for Google Search API.
Adapted from: Instructions adapted from https://stackoverflow.com/questions/
37083058/
programmatically-searching-google-in-python-using-custom-search
TODO: DOCS for using it
1. Install google-api-python-client
- If you don't already have a Google account, sign up.
- If you have never created a Google APIs Console project,
read the Managing Projects page and create a project in the Google API Console.
- Install the library using pip install google-api-python-client
The current version of the library is 2.70.0 at this time
2. To create an API key:
- Navigate to the APIs & Services→Credentials panel in Cloud Console.
- Select Create credentials, then select API key from the drop-down menu.
- The API key created dialog box displays your newly created key.
- You now have an API_KEY
3. Setup Custom Search Engine so you can search the entire web
- Create a custom search engine in this link.
- In Sites to search, add any valid URL (i.e. www.stackoverflow.com).
- Thats all you have to fill up, the rest doesnt matter.
In the left-side menu, click Edit search engine → {your search engine name}
→ Setup Set Search the entire web to ON. Remove the URL you added from
the list of Sites to search.
- Under Search engine ID youll find the search-engine-ID.
4. Enable the Custom Search API
- Navigate to the APIs & Services→Dashboard panel in Cloud Console.
- Click Enable APIs and Services.
- Search for Custom Search API and click on it.
- Click Enable.
URL for it: https://console.cloud.google.com/apis/library/customsearch.googleapis
.com
"""
search_engine: Any #: :meta private:
google_api_key: Optional[str] = None
google_cse_id: Optional[str] = None
k: int = 10
siterestrict: bool = False
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
def _google_search_results(self, search_term: str, **kwargs: Any) -> List[dict]:
cse = self.search_engine.cse()
if self.siterestrict:
cse = cse.siterestrict()
res = cse.list(q=search_term, cx=self.google_cse_id, **kwargs).execute()
return res.get("items", [])
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
google_api_key = get_from_dict_or_env(
values, "google_api_key", "GOOGLE_API_KEY"
)
values["google_api_key"] = google_api_key
google_cse_id = get_from_dict_or_env(values, "google_cse_id", "GOOGLE_CSE_ID")
values["google_cse_id"] = google_cse_id
try:
from googleapiclient.discovery import build
except ImportError:
raise ImportError(
"google-api-python-client is not installed. "
"Please install it with `pip install google-api-python-client`"
)
service = build("customsearch", "v1", developerKey=google_api_key)
values["search_engine"] = service
return values
def run(self, query: str) -> str:
"""Run query through GoogleSearch and parse result."""
snippets = []
results = self._google_search_results(query, num=self.k)
if len(results) == 0:
return "No good Google Search Result was found"
for result in results:
if "snippet" in result:
snippets.append(result["snippet"])
return " ".join(snippets)
def results(self, query: str, num_results: int) -> List[Dict]:
"""Run query through GoogleSearch and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
metadata_results = []
results = self._google_search_results(query, num=num_results)
if len(results) == 0:
return [{"Result": "No good Google Search Result was found"}]
for result in results:
metadata_result = {
"title": result["title"],
"link": result["link"],
}
if "snippet" in result:
metadata_result["snippet"] = result["snippet"]
metadata_results.append(metadata_result)
return metadata_results