forked from Archives/langchain
LLMRequestsChain (#267)
parent
68666d6a22
commit
28be37f470
@ -0,0 +1,123 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd7ec7af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLMRequestsChain\n",
|
||||
"\n",
|
||||
"Using the request library to get HTML results from a URL and then an LLM to parse results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dd8eae75",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains import LLMRequestsChain, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "65bf324e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Between >>> and <<< are the raw search result text from google.\n",
|
||||
"Extract the answer to the question '{query}' or say \"not found\" if the information is not contained.\n",
|
||||
"Use the format\n",
|
||||
"Extracted:<answer or \"not found\">\n",
|
||||
">>> {requests_result} <<<\n",
|
||||
"Extracted:\"\"\"\n",
|
||||
"\n",
|
||||
"PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"query\", \"requests_result\"],\n",
|
||||
" template=template,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f36ae0d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = LLMRequestsChain(llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=PROMPT))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b5d22d9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"What are the Three (3) biggest countries, and their respective sizes?\"\n",
|
||||
"inputs = {\n",
|
||||
" \"query\": question,\n",
|
||||
" \"url\": \"https://www.google.com/search?q=\" + question.replace(\" \", \"+\")\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2ea81168",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What are the Three (3) biggest countries, and their respective sizes?',\n",
|
||||
" 'url': 'https://www.google.com/search?q=What+are+the+Three+(3)+biggest+countries,+and+their+respective+sizes?',\n",
|
||||
" 'output': ' Russia (17,098,242 sq km), Canada (9,984,670 sq km), China (9,706,961 sq km)'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db8f2b6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -0,0 +1,73 @@
|
||||
"""Chain that hits a URL and then uses an LLM to parse results."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.requests import RequestsWrapper
|
||||
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" # noqa: E501
|
||||
}
|
||||
|
||||
|
||||
class LLMRequestsChain(Chain, BaseModel):
|
||||
"""Chain that hits a URL and then uses an LLM to parse results."""
|
||||
|
||||
llm_chain: LLMChain
|
||||
requests_wrapper: RequestsWrapper = Field(default_factory=RequestsWrapper)
|
||||
text_length: int = 8000
|
||||
requests_key: str = "requests_result" #: :meta private:
|
||||
input_key: str = "url" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Will be whatever keys the prompt expects.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Will always return text key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # noqa: F401
|
||||
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import bs4 python package. "
|
||||
"Please it install it with `pip install bs4`."
|
||||
)
|
||||
return values
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Other keys are assumed to be needed for LLM prediction
|
||||
other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
|
||||
url = inputs[self.input_key]
|
||||
res = self.requests_wrapper.run(url)
|
||||
# extract the text from the html
|
||||
soup = BeautifulSoup(res, "html.parser")
|
||||
other_keys[self.requests_key] = soup.get_text()[: self.text_length]
|
||||
result = self.llm_chain.predict(**other_keys)
|
||||
return {self.output_key: result}
|
@ -0,0 +1,15 @@
|
||||
"""Lightweight wrapper around request library."""
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class RequestsWrapper(BaseModel):
|
||||
"""Lightweight wrapper to partial out everything except the url to hit."""
|
||||
|
||||
headers: Optional[dict] = None
|
||||
|
||||
def run(self, url: str) -> str:
|
||||
"""Hit the URL and return the text."""
|
||||
return requests.get(url, headers=self.headers).text
|
Loading…
Reference in New Issue