bump version to 0.0.95 (#1324)

1 year ago · f61858163d
parent 0824d65a5c
commit f61858163d
4 changed files with 131 additions and 10 deletions
--- a/docs/modules/indexes/getting_started.ipynb
+++ b/docs/modules/indexes/getting_started.ipynb
@ -95,7 +95,7 @@
   "id": "f3493fa4",
   "metadata": {},
   "source": [
-    "Now that the index is created, we can use it in a VectorDBQAChain to ask questions of the data!"
+    "Now that the index is created, we can use it to ask questions of the data! Note that under the hood this is actually doing a few steps as well, which we will cover later in this guide."
   ]
  },
  {
@ -107,7 +107,7 @@
    {
     "data": {
      "text/plain": [
-       "\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a consensus builder, and has gained a broad range of support. He also said that she is a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers.\""
+       "\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
      ]
     },
     "execution_count": 5,
@ -116,9 +116,61 @@
    }
   ],
   "source": [
-    "qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=index)\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
-    "qa.run(query)"
+    "index.query(query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ae46b239",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'question': 'What did the president say about Ketanji Brown Jackson',\n",
+       " 'answer': \" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, one of the nation's top legal minds, to continue Justice Breyer's legacy of excellence, and that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\\n\",\n",
+       " 'sources': '../state_of_the_union.txt'}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
+    "index.query_with_sources(query)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff100212",
+   "metadata": {},
+   "source": [
+    "What is returned from the `VectorstoreIndexCreator` is `VectorStoreIndexWrapper`, which provides these nice `query` and `query_with_sources` functionality. If we just wanted to access the vectorstore directly, we can also do that."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b04f3c10",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<langchain.vectorstores.chroma.Chroma at 0x113a3a700>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.vectorstore"
   ]
  },
  {
@ -223,6 +275,7 @@
   "id": "30c4e5c6",
   "metadata": {},
   "source": [
+    "So that's creating the index.\n",
    "Then, as before, we create a chain and use it to answer questions!"
   ]
  },
--- a/docs/use_cases/question_answering.md
+++ b/docs/use_cases/question_answering.md
@ -1,5 +1,41 @@
 # Question Answering

+Question answering in this context refers to question answering over your document data. 
+For question answering over other types of data, like [SQL databases](../modules/chains/examples/sqlite.html) or [APIs](../modules/chains/examples/api.html), please see [here](../modules/chains/utility_how_to.html)
+
+For question answering over many documents, you almost always want to create an index over the data.
+This can be used to smartly access the most relevant documents for a given question, allowing you to avoid having to pass all the documents to the LLM (saving you time and money).
+
+See [this notebook](../modules/indexes/getting_started.ipynb) for a more detailed introduction to this, but for a super quick start the steps involved are:
+
+**Load Your Documents**
+```python
+from langchain.document_loaders import TextLoader
+loader = TextLoader('../state_of_the_union.txt')
+```
+See [here](../modules/document_loaders/how_to_guides.rst) for more information on how to get started with document loading.
+
+**Create Your Index**
+```python
+from langchain.indexes import VectorstoreIndexCreator
+index = VectorstoreIndexCreator().from_loaders([loader])
+```
+The best and most popular index by far at the moment is the VectorStore index.
+
+**Query Your Index**
+```python
+query = "What did the president say about Ketanji Brown Jackson"
+index.query(query)
+```
+Alternatively, use `query_with_sources` to also get back the sources involved
+```python
+query = "What did the president say about Ketanji Brown Jackson"
+index.query_with_sources(query)
+```
+Again, these high level interfaces obfuscate a lot of what is going on under the hood, so please see [this notebook](../modules/indexes/getting_started.ipynb) for a lower level walkthrough.
+
+## Document Question Answering
+
 Question answering involves fetching multiple documents, and then asking a question of them.
 The LLM response will contain the answer to your question, based on the content of the documents.

@ -15,7 +51,7 @@ The following resources exist:
 - [Question Answering Notebook](/modules/indexes/chain_examples/question_answering.ipynb): A notebook walking through how to accomplish this task.
 - [VectorDB Question Answering Notebook](/modules/indexes/chain_examples/vector_db_qa.ipynb): A notebook walking through how to do question answering over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.

-### Adding in sources
+## Adding in sources

 There is also a variant of this, where in addition to responding with the answer the language model will also cite its sources (eg which of the documents passed in it used).

@ -31,7 +67,7 @@ The following resources exist:
 - [QA With Sources Notebook](/modules/indexes/chain_examples/qa_with_sources.ipynb): A notebook walking through how to accomplish this task.
 - [VectorDB QA With Sources Notebook](/modules/indexes/chain_examples/vector_db_qa_with_sources.ipynb): A notebook walking through how to do question answering with sources over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.

-### Additional Related Resources
+## Additional Related Resources

 Additional related resources include:
 - [Utilities for working with Documents](/modules/utils/how_to_guides.rst): Guides on how to use several of the utilities which will prove helpful for this task, including Text Splitters (for splitting up long documents) and Embeddings & Vectorstores (useful for the above Vector DB example).
--- a/langchain/indexes/vectorstore.py
+++ b/langchain/indexes/vectorstore.py
@ -1,10 +1,14 @@
-from typing import List, Type
+from typing import Any, List, Optional, Type

 from pydantic import BaseModel, Extra, Field

+from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain
+from langchain.chains.vector_db_qa.base import VectorDBQA
 from langchain.document_loaders.base import BaseLoader
 from langchain.embeddings.base import Embeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms.base import BaseLLM
+from langchain.llms.openai import OpenAI
 from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
 from langchain.vectorstores.base import VectorStore
 from langchain.vectorstores.chroma import Chroma
@ -14,6 +18,34 @@ def _get_default_text_splitter() -> TextSplitter:
    return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)


+class VectorStoreIndexWrapper(BaseModel):
+    """Wrapper around a vectorstore for easy access."""
+
+    vectorstore: VectorStore
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        arbitrary_types_allowed = True
+
+    def query(self, question: str, llm: Optional[BaseLLM] = None, **kwargs: Any) -> str:
+        """Query the vectorstore."""
+        llm = llm or OpenAI(temperature=0)
+        chain = VectorDBQA.from_chain_type(llm, vectorstore=self.vectorstore, **kwargs)
+        return chain.run(question)
+
+    def query_with_sources(
+        self, question: str, llm: Optional[BaseLLM] = None, **kwargs: Any
+    ) -> dict:
+        """Query the vectorstore and get back sources."""
+        llm = llm or OpenAI(temperature=0)
+        chain = VectorDBQAWithSourcesChain.from_chain_type(
+            llm, vectorstore=self.vectorstore, **kwargs
+        )
+        return chain({chain.question_key: question})
+
+
 class VectorstoreIndexCreator(BaseModel):
    """Logic for creating indexes."""

@ -27,11 +59,11 @@ class VectorstoreIndexCreator(BaseModel):
        extra = Extra.forbid
        arbitrary_types_allowed = True

-    def from_loaders(self, loaders: List[BaseLoader]) -> VectorStore:
+    def from_loaders(self, loaders: List[BaseLoader]) -> VectorStoreIndexWrapper:
        """Create a vectorstore index from loaders."""
        docs = []
        for loader in loaders:
            docs.extend(loader.load())
        sub_docs = self.text_splitter.split_documents(docs)
        vectorstore = self.vectorstore_cls.from_documents(sub_docs, self.embedding)
-        return vectorstore
+        return VectorStoreIndexWrapper(vectorstore=vectorstore)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain"
-version = "0.0.94"
+version = "0.0.95"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"