forked from Archives/langchain
Harrison/retrieval code (#1916)
parent
eb80d6e0e4
commit
fab7994b74
@ -0,0 +1,20 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
||||
|
||||
Chat History:
|
||||
{chat_history}
|
||||
Follow Up Input: {question}
|
||||
Standalone question:"""
|
||||
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
||||
|
||||
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Helpful Answer:"""
|
||||
QA_PROMPT = PromptTemplate(
|
||||
template=prompt_template, input_variables=["context", "question"]
|
||||
)
|
@ -0,0 +1,46 @@
|
||||
"""Question-answering with sources over an index."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
|
||||
from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.schema import BaseRetriever
|
||||
|
||||
|
||||
class RetrievalQAWithSourcesChain(BaseQAWithSourcesChain, BaseModel):
|
||||
"""Question-answering with sources over an index."""
|
||||
|
||||
retriever: BaseRetriever = Field(exclude=True)
|
||||
"""Index to connect to."""
|
||||
reduce_k_below_max_tokens: bool = False
|
||||
"""Reduce the number of results to return from store based on tokens limit"""
|
||||
max_tokens_limit: int = 3375
|
||||
"""Restrict the docs to return from store based on tokens,
|
||||
enforced only for StuffDocumentChain and if reduce_k_below_max_tokens is to true"""
|
||||
|
||||
def _reduce_tokens_below_limit(self, docs: List[Document]) -> List[Document]:
|
||||
num_docs = len(docs)
|
||||
|
||||
if self.reduce_k_below_max_tokens and isinstance(
|
||||
self.combine_documents_chain, StuffDocumentsChain
|
||||
):
|
||||
tokens = [
|
||||
self.combine_documents_chain.llm_chain.llm.get_num_tokens(
|
||||
doc.page_content
|
||||
)
|
||||
for doc in docs
|
||||
]
|
||||
token_count = sum(tokens[:num_docs])
|
||||
while token_count > self.max_tokens_limit:
|
||||
num_docs -= 1
|
||||
token_count -= tokens[num_docs]
|
||||
|
||||
return docs[:num_docs]
|
||||
|
||||
def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
question = inputs[self.question_key]
|
||||
docs = self.retriever.get_relevant_texts(question)
|
||||
return self._reduce_tokens_below_limit(docs)
|
@ -1,39 +1,3 @@
|
||||
"""Interface for interacting with a document."""
|
||||
from typing import List
|
||||
from langchain.schema import Document
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
"""Interface for interacting with a document."""
|
||||
|
||||
page_content: str
|
||||
lookup_str: str = ""
|
||||
lookup_index = 0
|
||||
metadata: dict = Field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def paragraphs(self) -> List[str]:
|
||||
"""Paragraphs of the page."""
|
||||
return self.page_content.split("\n\n")
|
||||
|
||||
@property
|
||||
def summary(self) -> str:
|
||||
"""Summary of the page (the first paragraph)."""
|
||||
return self.paragraphs[0]
|
||||
|
||||
def lookup(self, string: str) -> str:
|
||||
"""Lookup a term in the page, imitating cmd-F functionality."""
|
||||
if string.lower() != self.lookup_str:
|
||||
self.lookup_str = string.lower()
|
||||
self.lookup_index = 0
|
||||
else:
|
||||
self.lookup_index += 1
|
||||
lookups = [p for p in self.paragraphs if self.lookup_str in p.lower()]
|
||||
if len(lookups) == 0:
|
||||
return "No Results"
|
||||
elif self.lookup_index >= len(lookups):
|
||||
return "No More Results"
|
||||
else:
|
||||
result_prefix = f"(Result {self.lookup_index + 1}/{len(lookups)})"
|
||||
return f"{result_prefix} {lookups[self.lookup_index]}"
|
||||
__all__ = ["Document"]
|
||||
|
Loading…
Reference in New Issue