2023-11-14 01:22:39 +00:00
|
|
|
import os
|
|
|
|
from typing import List, Tuple
|
|
|
|
|
|
|
|
from langchain.agents import AgentExecutor
|
|
|
|
from langchain.agents.format_scratchpad import format_to_openai_function_messages
|
|
|
|
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
|
2023-11-14 04:54:03 +00:00
|
|
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
|
|
|
from langchain.tools.retriever import create_retriever_tool
|
2024-01-02 23:23:34 +00:00
|
|
|
from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
|
2024-01-03 07:18:15 +00:00
|
|
|
from langchain_community.utilities.arxiv import ArxivAPIWrapper
|
2024-02-22 23:58:44 +00:00
|
|
|
from langchain_core.documents import Document
|
docs[patch], templates[patch]: Import from core (#14575)
Update imports to use core for the low-hanging fruit changes. Ran
following
```bash
git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g'
git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g'
git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g'
git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g'
git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g'
git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g'
git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g'
git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g'
git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g'
git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g'
git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g'
git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g'
git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g'
git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g'
git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g'
git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g'
git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g'
git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g'
```
2023-12-12 00:49:10 +00:00
|
|
|
from langchain_core.messages import AIMessage, HumanMessage
|
2024-01-03 21:28:05 +00:00
|
|
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
docs[patch], templates[patch]: Import from core (#14575)
Update imports to use core for the low-hanging fruit changes. Ran
following
```bash
git grep -l 'langchain.schema.runnable' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.runnable/langchain_core.runnables/g'
git grep -l 'langchain.schema.output_parser' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.output_parser/langchain_core.output_parsers/g'
git grep -l 'langchain.schema.messages' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.messages/langchain_core.messages/g'
git grep -l 'langchain.schema.chat_histry' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.chat_history/langchain_core.chat_history/g'
git grep -l 'langchain.schema.prompt_template' {docs,templates,cookbook} | xargs sed -i '' 's/langchain\.schema\.prompt_template/langchain_core.prompts/g'
git grep -l 'from langchain.pydantic_v1' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.pydantic_v1/from langchain_core.pydantic_v1/g'
git grep -l 'from langchain.tools.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.tools\.base/from langchain_core.tools/g'
git grep -l 'from langchain.chat_models.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.chat_models.base/from langchain_core.language_models.chat_models/g'
git grep -l 'from langchain.llms.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.llms\.base\ /from langchain_core.language_models.llms\ /g'
git grep -l 'from langchain.embeddings.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.embeddings\.base/from langchain_core.embeddings/g'
git grep -l 'from langchain.vectorstores.base' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.vectorstores\.base/from langchain_core.vectorstores/g'
git grep -l 'from langchain.agents.tools' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.agents\.tools/from langchain_core.tools/g'
git grep -l 'from langchain.schema.output' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.output\ /from langchain_core.outputs\ /g'
git grep -l 'from langchain.schema.embeddings' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.embeddings/from langchain_core.embeddings/g'
git grep -l 'from langchain.schema.document' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.document/from langchain_core.documents/g'
git grep -l 'from langchain.schema.agent' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.agent/from langchain_core.agents/g'
git grep -l 'from langchain.schema.prompt ' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.prompt\ /from langchain_core.prompt_values /g'
git grep -l 'from langchain.schema.language_model' {docs,templates,cookbook} | xargs sed -i '' 's/from langchain\.schema\.language_model/from langchain_core.language_models/g'
```
2023-12-12 00:49:10 +00:00
|
|
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
2024-02-22 23:58:44 +00:00
|
|
|
from langchain_core.retrievers import BaseRetriever
|
2024-01-23 21:58:06 +00:00
|
|
|
from langchain_openai import AzureChatOpenAI
|
2023-11-14 01:22:39 +00:00
|
|
|
|
|
|
|
|
2023-11-14 04:54:03 +00:00
|
|
|
class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
|
|
|
"""`Arxiv` retriever.
|
2023-11-14 01:22:39 +00:00
|
|
|
|
2023-11-14 04:54:03 +00:00
|
|
|
It wraps load() to get_relevant_documents().
|
|
|
|
It uses all ArxivAPIWrapper arguments without any change.
|
|
|
|
"""
|
|
|
|
|
|
|
|
get_full_documents: bool = False
|
|
|
|
|
|
|
|
def _get_relevant_documents(
|
|
|
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
|
|
) -> List[Document]:
|
|
|
|
try:
|
|
|
|
if self.is_arxiv_identifier(query):
|
|
|
|
results = self.arxiv_search(
|
|
|
|
id_list=query.split(),
|
|
|
|
max_results=self.top_k_results,
|
|
|
|
).results()
|
|
|
|
else:
|
|
|
|
results = self.arxiv_search( # type: ignore
|
|
|
|
query[: self.ARXIV_MAX_QUERY_LENGTH], max_results=self.top_k_results
|
|
|
|
).results()
|
|
|
|
except self.arxiv_exceptions as ex:
|
|
|
|
return [Document(page_content=f"Arxiv exception: {ex}")]
|
|
|
|
docs = [
|
|
|
|
Document(
|
|
|
|
page_content=result.summary,
|
|
|
|
metadata={
|
|
|
|
"Published": result.updated.date(),
|
|
|
|
"Title": result.title,
|
|
|
|
"Authors": ", ".join(a.name for a in result.authors),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
for result in results
|
|
|
|
]
|
|
|
|
return docs
|
|
|
|
|
|
|
|
|
|
|
|
description = (
|
|
|
|
"A wrapper around Arxiv.org "
|
|
|
|
"Useful for when you need to answer questions about Physics, Mathematics, "
|
|
|
|
"Computer Science, Quantitative Biology, Quantitative Finance, Statistics, "
|
|
|
|
"Electrical Engineering, and Economics "
|
|
|
|
"from scientific articles on arxiv.org. "
|
|
|
|
"Input should be a search query."
|
|
|
|
)
|
2023-11-14 01:22:39 +00:00
|
|
|
|
|
|
|
# Create the tool
|
2023-11-14 04:54:03 +00:00
|
|
|
arxiv_tool = create_retriever_tool(ArxivRetriever(), "arxiv", description)
|
2023-11-14 01:22:39 +00:00
|
|
|
tools = [arxiv_tool]
|
|
|
|
llm = AzureChatOpenAI(
|
|
|
|
temperature=0,
|
2024-01-23 21:58:06 +00:00
|
|
|
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
|
|
|
|
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
|
|
|
|
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
|
2023-11-14 01:22:39 +00:00
|
|
|
)
|
|
|
|
assistant_system_message = """You are a helpful research assistant. \
|
|
|
|
Lookup relevant information as needed."""
|
|
|
|
prompt = ChatPromptTemplate.from_messages(
|
|
|
|
[
|
|
|
|
("system", assistant_system_message),
|
|
|
|
MessagesPlaceholder(variable_name="chat_history"),
|
|
|
|
("user", "{input}"),
|
|
|
|
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])
|
|
|
|
|
|
|
|
|
|
|
|
def _format_chat_history(chat_history: List[Tuple[str, str]]):
|
|
|
|
buffer = []
|
|
|
|
for human, ai in chat_history:
|
|
|
|
buffer.append(HumanMessage(content=human))
|
|
|
|
buffer.append(AIMessage(content=ai))
|
|
|
|
return buffer
|
|
|
|
|
|
|
|
|
|
|
|
agent = (
|
|
|
|
{
|
|
|
|
"input": lambda x: x["input"],
|
|
|
|
"chat_history": lambda x: _format_chat_history(x["chat_history"]),
|
|
|
|
"agent_scratchpad": lambda x: format_to_openai_function_messages(
|
|
|
|
x["intermediate_steps"]
|
|
|
|
),
|
|
|
|
}
|
|
|
|
| prompt
|
|
|
|
| llm_with_tools
|
|
|
|
| OpenAIFunctionsAgentOutputParser()
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
class AgentInput(BaseModel):
|
|
|
|
input: str
|
|
|
|
chat_history: List[Tuple[str, str]] = Field(
|
|
|
|
..., extra={"widget": {"type": "chat", "input": "input", "output": "output"}}
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True).with_types(
|
|
|
|
input_type=AgentInput
|
|
|
|
)
|