forked from Archives/langchain
LLMRequestsChain (#267)
parent
68666d6a22
commit
28be37f470
@ -0,0 +1,123 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "dd7ec7af",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# LLMRequestsChain\n",
|
||||||
|
"\n",
|
||||||
|
"Using the request library to get HTML results from a URL and then an LLM to parse results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "dd8eae75",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.llms import OpenAI\n",
|
||||||
|
"from langchain.chains import LLMRequestsChain, LLMChain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "65bf324e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.prompts import PromptTemplate\n",
|
||||||
|
"\n",
|
||||||
|
"template = \"\"\"Between >>> and <<< are the raw search result text from google.\n",
|
||||||
|
"Extract the answer to the question '{query}' or say \"not found\" if the information is not contained.\n",
|
||||||
|
"Use the format\n",
|
||||||
|
"Extracted:<answer or \"not found\">\n",
|
||||||
|
">>> {requests_result} <<<\n",
|
||||||
|
"Extracted:\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"PROMPT = PromptTemplate(\n",
|
||||||
|
" input_variables=[\"query\", \"requests_result\"],\n",
|
||||||
|
" template=template,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "f36ae0d8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"chain = LLMRequestsChain(llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=PROMPT))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "b5d22d9d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"question = \"What are the Three (3) biggest countries, and their respective sizes?\"\n",
|
||||||
|
"inputs = {\n",
|
||||||
|
" \"query\": question,\n",
|
||||||
|
" \"url\": \"https://www.google.com/search?q=\" + question.replace(\" \", \"+\")\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "2ea81168",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'query': 'What are the Three (3) biggest countries, and their respective sizes?',\n",
|
||||||
|
" 'url': 'https://www.google.com/search?q=What+are+the+Three+(3)+biggest+countries,+and+their+respective+sizes?',\n",
|
||||||
|
" 'output': ' Russia (17,098,242 sq km), Canada (9,984,670 sq km), China (9,706,961 sq km)'}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"chain(inputs)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "db8f2b6d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,73 @@
|
|||||||
|
"""Chain that hits a URL and then uses an LLM to parse results."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Extra, Field, root_validator
|
||||||
|
|
||||||
|
from langchain.chains import LLMChain
|
||||||
|
from langchain.chains.base import Chain
|
||||||
|
from langchain.requests import RequestsWrapper
|
||||||
|
|
||||||
|
DEFAULT_HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" # noqa: E501
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LLMRequestsChain(Chain, BaseModel):
|
||||||
|
"""Chain that hits a URL and then uses an LLM to parse results."""
|
||||||
|
|
||||||
|
llm_chain: LLMChain
|
||||||
|
requests_wrapper: RequestsWrapper = Field(default_factory=RequestsWrapper)
|
||||||
|
text_length: int = 8000
|
||||||
|
requests_key: str = "requests_result" #: :meta private:
|
||||||
|
input_key: str = "url" #: :meta private:
|
||||||
|
output_key: str = "output" #: :meta private:
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Configuration for this pydantic object."""
|
||||||
|
|
||||||
|
extra = Extra.forbid
|
||||||
|
arbitrary_types_allowed = True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_keys(self) -> List[str]:
|
||||||
|
"""Will be whatever keys the prompt expects.
|
||||||
|
|
||||||
|
:meta private:
|
||||||
|
"""
|
||||||
|
return [self.input_key]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_keys(self) -> List[str]:
|
||||||
|
"""Will always return text key.
|
||||||
|
|
||||||
|
:meta private:
|
||||||
|
"""
|
||||||
|
return [self.output_key]
|
||||||
|
|
||||||
|
@root_validator()
|
||||||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
|
"""Validate that api key and python package exists in environment."""
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup # noqa: F401
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
raise ValueError(
|
||||||
|
"Could not import bs4 python package. "
|
||||||
|
"Please it install it with `pip install bs4`."
|
||||||
|
)
|
||||||
|
return values
|
||||||
|
|
||||||
|
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# Other keys are assumed to be needed for LLM prediction
|
||||||
|
other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
|
||||||
|
url = inputs[self.input_key]
|
||||||
|
res = self.requests_wrapper.run(url)
|
||||||
|
# extract the text from the html
|
||||||
|
soup = BeautifulSoup(res, "html.parser")
|
||||||
|
other_keys[self.requests_key] = soup.get_text()[: self.text_length]
|
||||||
|
result = self.llm_chain.predict(**other_keys)
|
||||||
|
return {self.output_key: result}
|
@ -0,0 +1,15 @@
|
|||||||
|
"""Lightweight wrapper around request library."""
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class RequestsWrapper(BaseModel):
|
||||||
|
"""Lightweight wrapper to partial out everything except the url to hit."""
|
||||||
|
|
||||||
|
headers: Optional[dict] = None
|
||||||
|
|
||||||
|
def run(self, url: str) -> str:
|
||||||
|
"""Hit the URL and return the text."""
|
||||||
|
return requests.get(url, headers=self.headers).text
|
Loading…
Reference in New Issue