From b1b6b27c5fca99b7ce49c118cd88f718e034a7d1 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Sun, 13 Nov 2022 20:13:23 -0800 Subject: [PATCH] Harrison/redo docs (#130) Co-authored-by: Eugene Yurtsev --- README.md | 45 +---- docs/conf.py | 4 + docs/core_concepts.md | 25 +++ docs/examples/demos.rst | 10 + .../examples/demos}/llm_math.ipynb | 16 +- .../examples/demos}/map reduce.ipynb | 16 +- {examples => docs/examples/demos}/mrkl.ipynb | 36 ++-- {examples => docs/examples/demos}/natbot.py | 0 docs/examples/demos/react.ipynb | 98 ++++++++++ .../demos}/self_ask_with_search.ipynb | 30 ++- .../examples/demos}/simple_prompts.ipynb | 16 +- .../examples/demos}/sqlite.ipynb | 55 +++++- .../examples/demos}/vector_db_qa.ipynb | 22 ++- docs/examples/integrations.rst | 10 + .../examples/integrations}/embeddings.ipynb | 78 ++++++-- .../integrations}/huggingface_hub.ipynb | 14 +- .../huggingface_tokenizer_text_splitter.ipynb | 180 ++++++++++++++++++ .../examples/integrations}/manifest.ipynb | 12 +- .../examples}/model_laboratory.ipynb | 16 +- docs/examples/prompts.rst | 10 + .../examples/prompts}/generate_examples.ipynb | 75 +++++--- .../prompts}/prompt_optimization.ipynb | 68 +++---- .../examples}/state_of_the_union.txt | 0 docs/getting_started/chains.md | 38 ++++ docs/getting_started/environment.md | 37 ++++ docs/getting_started/installation.md | 11 ++ docs/getting_started/llm.md | 25 +++ docs/index.rst | 68 ++++++- docs/installation.md | 24 +++ docs/integrations.md | 33 ++++ docs/modules/text_splitter.rst | 6 + docs/modules/vectorstore.rst | 6 + docs/requirements.txt | 6 +- .../huggingface_tokenizer_text_splitter.ipynb | 104 ---------- examples/react.ipynb | 83 -------- 35 files changed, 909 insertions(+), 368 deletions(-) create mode 100644 docs/core_concepts.md create mode 100644 docs/examples/demos.rst rename {examples => docs/examples/demos}/llm_math.ipynb (81%) rename {examples => docs/examples/demos}/map reduce.ipynb (86%) rename {examples => docs/examples/demos}/mrkl.ipynb (88%) rename {examples => docs/examples/demos}/natbot.py (100%) create mode 100644 docs/examples/demos/react.ipynb rename {examples => docs/examples/demos}/self_ask_with_search.ipynb (63%) rename {examples => docs/examples/demos}/simple_prompts.ipynb (83%) rename {examples => docs/examples/demos}/sqlite.ipynb (63%) rename {examples => docs/examples/demos}/vector_db_qa.ipynb (80%) create mode 100644 docs/examples/integrations.rst rename {examples => docs/examples/integrations}/embeddings.ipynb (83%) rename {examples => docs/examples/integrations}/huggingface_hub.ipynb (84%) create mode 100644 docs/examples/integrations/huggingface_tokenizer_text_splitter.ipynb rename {examples => docs/examples/integrations}/manifest.ipynb (96%) rename {examples => docs/examples}/model_laboratory.ipynb (88%) create mode 100644 docs/examples/prompts.rst rename {examples => docs/examples/prompts}/generate_examples.ipynb (70%) rename {examples => docs/examples/prompts}/prompt_optimization.ipynb (74%) rename {examples => docs/examples}/state_of_the_union.txt (100%) create mode 100644 docs/getting_started/chains.md create mode 100644 docs/getting_started/environment.md create mode 100644 docs/getting_started/installation.md create mode 100644 docs/getting_started/llm.md create mode 100644 docs/installation.md create mode 100644 docs/integrations.md create mode 100644 docs/modules/text_splitter.rst create mode 100644 docs/modules/vectorstore.rst delete mode 100644 examples/huggingface_tokenizer_text_splitter.ipynb delete mode 100644 examples/react.ipynb diff --git a/README.md b/README.md index af99dfda..b195840a 100644 --- a/README.md +++ b/README.md @@ -23,39 +23,13 @@ It aims to create: 2. a flexible interface for combining pieces into a single comprehensive "chain" 3. a schema for easily saving and sharing those chains -## 🔧 Setting up your environment - -Besides the installation of this python package, you will also need to install packages and set environment variables depending on which chains you want to use. - -Note: the reason these packages are not included in the dependencies by default is that as we imagine scaling this package, we do not want to force dependencies that are not needed. - -The following use cases require specific installs and api keys: - -- _OpenAI_: - - Install requirements with `pip install openai` - - Get an OpenAI api key and either set it as an environment variable (`OPENAI_API_KEY`) or pass it to the LLM constructor as `openai_api_key`. -- _Cohere_: - - Install requirements with `pip install cohere` - - Get a Cohere api key and either set it as an environment variable (`COHERE_API_KEY`) or pass it to the LLM constructor as `cohere_api_key`. -- _HuggingFace Hub_ - - Install requirements with `pip install huggingface_hub` - - Get a HuggingFace Hub api token and either set it as an environment variable (`HUGGINGFACEHUB_API_TOKEN`) or pass it to the LLM constructor as `huggingfacehub_api_token`. -- _SerpAPI_: - - Install requirements with `pip install google-search-results` - - Get a SerpAPI api key and either set it as an environment variable (`SERPAPI_API_KEY`) or pass it to the LLM constructor as `serpapi_api_key`. -- _NatBot_: - - Install requirements with `pip install playwright` -- _Wikipedia_: - - Install requirements with `pip install wikipedia` -- _Elasticsearch_: - - Install requirements with `pip install elasticsearch` - - Set up Elasticsearch backend. If you want to do locally, [this](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/getting-started.html) is a good guide. -- _FAISS_: - - Install requirements with `pip install faiss` for Python 3.7 and `pip install faiss-cpu` for Python 3.10+. -- _Manifest_: - - Install requirements with `pip install manifest-ml` (Note: this is only available in Python 3.8+ currently). - -If you are using the `NLTKTextSplitter` or the `SpacyTextSplitter`, you will also need to install the appropriate models. For example, if you want to use the `SpacyTextSplitter`, you will need to install the `en_core_web_sm` model with `python -m spacy download en_core_web_sm`. Similarly, if you want to use the `NLTKTextSplitter`, you will need to install the `punkt` model with `python -m nltk.downloader punkt`. +## 📖 Documentation + +Please see [here](https://langchain.readthedocs.io/en/latest/?) for full documentation on: +- Getting started (installation, setting up environment, simple examples) +- How-To examples (demos, integrations, helper functions) +- Reference (full API docs) +- Resources (high level explanation of core concepts) ## 🚀 What can I do with this @@ -130,11 +104,6 @@ query = "What did the president say about Ketanji Brown Jackson" docs = docsearch.similarity_search(query) ``` -## 📖 Documentation - -The above examples are probably the most user friendly documentation that exists, -but full API docs can be found [here](https://langchain.readthedocs.io/en/latest/?). - ## 🤖 Developer Guide To begin developing on this project, first clone to the repo locally. diff --git a/docs/conf.py b/docs/conf.py index 17b2c4b1..f5e6b1fc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -37,10 +37,14 @@ extensions = [ "sphinx.ext.autodoc.typehints", "sphinx.ext.autosummary", "sphinx.ext.napoleon", + "sphinx.ext.viewcode", "sphinxcontrib.autodoc_pydantic", "myst_parser", + "nbsphinx", + "sphinx_panels", ] + autodoc_pydantic_model_show_json = False autodoc_pydantic_field_list_validators = False autodoc_pydantic_config_members = False diff --git a/docs/core_concepts.md b/docs/core_concepts.md new file mode 100644 index 00000000..e309cea0 --- /dev/null +++ b/docs/core_concepts.md @@ -0,0 +1,25 @@ +# Core Concepts + +This section goes over the core concepts of LangChain. +Understanding these will go a long way in helping you understand the codebase and how to construct chains. + +## Prompts +Prompts generically have a `format` method that takes in variables and returns a formatted string. +The most simple implementation of this is to have a template string with some variables in it, and then format it with the incoming variables. +More complex iterations dynamically construct the template string from few shot examples, etc. + +## LLMs +Wrappers around Large Language Models (in particular, the `generate` ability of large language models) are some of the core functionality of LangChain. +These wrappers are classes that are callable: they take in an input string, and return the generated output string. + +## Embeddings +These classes are very similar to the LLM classes in that they are wrappers around models, +but rather than return a string they return an embedding (list of floats). This are particularly useful when +implementing semantic search functionality. They expose separate methods for embedding queries versus embedding documents. + +## Vectorstores +These are datastores that store documents. They expose a method for passing in a string and finding similar documents. + +## Chains +These are pipelines that combine multiple of the above ideas. +They vary greatly in complexity and are combination of generic, highly configurable pipelines and more narrow (but usually more complex) pipelines. diff --git a/docs/examples/demos.rst b/docs/examples/demos.rst new file mode 100644 index 00000000..e9518f1a --- /dev/null +++ b/docs/examples/demos.rst @@ -0,0 +1,10 @@ +Demos +===== + +The examples here are all end-to-end chains of specific applications. + +.. toctree:: + :maxdepth: 1 + :glob: + + demos/* diff --git a/examples/llm_math.ipynb b/docs/examples/demos/llm_math.ipynb similarity index 81% rename from examples/llm_math.ipynb rename to docs/examples/demos/llm_math.ipynb index 90b9cebc..cd47a7fd 100644 --- a/examples/llm_math.ipynb +++ b/docs/examples/demos/llm_math.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "e71e720f", + "metadata": {}, + "source": [ + "# LLM Math\n", + "\n", + "This notebook showcases using LLMs and Python REPLs to do complex word math problems." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -10,6 +20,9 @@ "name": "stdout", "output_type": "stream", "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", "How many of the integers between 0 and 99 inclusive are divisible by 8?\u001b[102m\n", "\n", "```python\n", @@ -21,7 +34,8 @@ "```\n", "\u001b[0m\n", "Answer: \u001b[103m13\n", - "\u001b[0m" + "\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { diff --git a/examples/map reduce.ipynb b/docs/examples/demos/map reduce.ipynb similarity index 86% rename from examples/map reduce.ipynb rename to docs/examples/demos/map reduce.ipynb index b10bac35..18dcd617 100644 --- a/examples/map reduce.ipynb +++ b/docs/examples/demos/map reduce.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "d9a0131f", + "metadata": {}, + "source": [ + "# Map Reduce\n", + "\n", + "This notebok showcases an example of map-reduce chains: recursive summarization." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -29,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "99bbe19b", "metadata": {}, "outputs": [ @@ -39,13 +49,13 @@ "\"\\n\\nThe President discusses the recent aggression by Russia, and the response by the United States and its allies. He announces new sanctions against Russia, and says that the free world is united in holding Putin accountable. The President also discusses the American Rescue Plan, the Bipartisan Infrastructure Law, and the Bipartisan Innovation Act. Finally, the President addresses the need for women's rights and equality for LGBTQ+ Americans.\"" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "with open('state_of_the_union.txt') as f:\n", + "with open('../state_of_the_union.txt') as f:\n", " state_of_the_union = f.read()\n", "mp_chain.run(state_of_the_union)" ] diff --git a/examples/mrkl.ipynb b/docs/examples/demos/mrkl.ipynb similarity index 88% rename from examples/mrkl.ipynb rename to docs/examples/demos/mrkl.ipynb index 7d335b19..b2462aea 100644 --- a/examples/mrkl.ipynb +++ b/docs/examples/demos/mrkl.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "f1390152", + "metadata": {}, + "source": [ + "# MRKL\n", + "\n", + "This notebook showcases using the MRKL chain to route between tasks" + ] + }, { "cell_type": "markdown", "id": "39ea3638", @@ -22,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "id": "07e96d99", "metadata": {}, "outputs": [], @@ -30,12 +40,12 @@ "llm = OpenAI(temperature=0)\n", "search = SerpAPIChain()\n", "llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n", - "db = SQLDatabase.from_uri(\"sqlite:///../notebooks/Chinook.db\")\n", + "db = SQLDatabase.from_uri(\"sqlite:///../../../notebooks/Chinook.db\")\n", "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)\n", "chains = [\n", " ChainConfig(\n", " action_name = \"Search\",\n", - " action=search.search,\n", + " action=search.run,\n", " action_description=\"useful for when you need to answer questions about current events\"\n", " ),\n", " ChainConfig(\n", @@ -46,7 +56,7 @@ " \n", " ChainConfig(\n", " action_name=\"FooBar DB\",\n", - " action=db_chain.query,\n", + " action=db_chain.run,\n", " action_description=\"useful for when you need to answer questions about FooBar. Input should be in the form of a question\"\n", " )\n", "]" @@ -54,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "id": "a069c4b6", "metadata": {}, "outputs": [], @@ -64,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "e603cd7d", "metadata": {}, "outputs": [ @@ -112,7 +122,7 @@ "'2.1520202182226886'" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -123,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "id": "a5c07010", "metadata": {}, "outputs": [ @@ -159,22 +169,22 @@ "What albums by Alanis Morissette are in the FooBar database?\n", "SQLQuery:\u001b[102m SELECT Title FROM Album WHERE ArtistId = (SELECT ArtistId FROM Artist WHERE Name = 'Alanis Morissette')\u001b[0m\n", "SQLResult: \u001b[103m[('Jagged Little Pill',)]\u001b[0m\n", - "Answer:\u001b[102m The album \"Jagged Little Pill\" by Alanis Morissette is in the FooBar database.\u001b[0m\n", + "Answer:\u001b[102m Jagged Little Pill\u001b[0m\n", "\u001b[1m> Finished chain.\u001b[0m\n", "\n", - "Observation: \u001b[101m The album \"Jagged Little Pill\" by Alanis Morissette is in the FooBar database.\u001b[0m\n", + "Observation: \u001b[101m Jagged Little Pill\u001b[0m\n", "Thought:\u001b[102m I now know the final answer\n", - "Final Answer: The album \"Jagged Little Pill\" by Alanis Morissette is the only album by Alanis Morissette in the FooBar database.\u001b[0m\n", + "Final Answer: The album is by Alanis Morissette and the albums in the FooBar database by her are Jagged Little Pill\u001b[0m\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { "data": { "text/plain": [ - "'The album \"Jagged Little Pill\" by Alanis Morissette is the only album by Alanis Morissette in the FooBar database.'" + "'The album is by Alanis Morissette and the albums in the FooBar database by her are Jagged Little Pill'" ] }, - "execution_count": 5, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } diff --git a/examples/natbot.py b/docs/examples/demos/natbot.py similarity index 100% rename from examples/natbot.py rename to docs/examples/demos/natbot.py diff --git a/docs/examples/demos/react.ipynb b/docs/examples/demos/react.ipynb new file mode 100644 index 00000000..d2c120f3 --- /dev/null +++ b/docs/examples/demos/react.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "82140df0", + "metadata": {}, + "source": [ + "# ReAct\n", + "\n", + "This notebook showcases the implementation of the ReAct chain logic." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4e272b47", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain import OpenAI, ReActChain, Wikipedia\n", + "\n", + "llm = OpenAI(temperature=0)\n", + "react = ReActChain(llm=llm, docstore=Wikipedia(), verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8078c8f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\n", + "Thought 1:\u001b[102m I need to search David Chanoff and find the U.S. Navy admiral he\n", + "collaborated with.\n", + "Action 1: Search[David Chanoff]\u001b[0m\n", + "Observation 1: \u001b[103mDavid Chanoff is a noted author of non-fiction work. His work has typically involved collaborations with the principal protagonist of the work concerned. His collaborators have included; Augustus A. White, Joycelyn Elders, Đoàn Văn Toại, William J. Crowe, Ariel Sharon, Kenneth Good and Felix Zandman. He has also written about a wide range of subjects including literary history, education and foreign for The Washington Post, The New Republic and The New York Times Magazine. He has published more than twelve books.\u001b[0m\n", + "Thought 2:\u001b[102m The U.S. Navy admiral David Chanoff collaborated with is William J. Crowe.\n", + "Action 2: Search[William J. Crowe]\u001b[0m\n", + "Observation 2: \u001b[103mWilliam James Crowe Jr. (January 2, 1925 – October 18, 2007) was a United States Navy admiral and diplomat who served as the 11th chairman of the Joint Chiefs of Staff under Presidents Ronald Reagan and George H. W. Bush, and as the ambassador to the United Kingdom and Chair of the Intelligence Oversight Board under President Bill Clinton.\u001b[0m\n", + "Thought 3:\u001b[102m William J. Crowe served as the ambassador to the United Kingdom under President Bill Clinton. So the answer is Bill Clinton.\n", + "Action 3: Finish[Bill Clinton]\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Bill Clinton'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question = \"Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\"\n", + "react.run(question)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a6bd3b4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/self_ask_with_search.ipynb b/docs/examples/demos/self_ask_with_search.ipynb similarity index 63% rename from examples/self_ask_with_search.ipynb rename to docs/examples/demos/self_ask_with_search.ipynb index 9dc71366..07135e58 100644 --- a/examples/self_ask_with_search.ipynb +++ b/docs/examples/demos/self_ask_with_search.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "0c3f1df8", + "metadata": {}, + "source": [ + "# Self Ask With Search\n", + "\n", + "This notebook showcases the Self Ask With Search chain." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -10,13 +20,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[49m\n", - "Are follow up questions needed here:\u001b[0m\u001b[102m Yes.\n", - "Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\u001b[49m\n", - "Intermediate answer: \u001b[0m\u001b[103mCarlos Alcaraz.\u001b[0m\u001b[102m\n", - "Follow up: Where is Carlos Alcaraz from?\u001b[0m\u001b[49m\n", - "Intermediate answer: \u001b[0m\u001b[103mEl Palmar, Murcia, Spain.\u001b[0m\u001b[102m\n", - "So the final answer is: El Palmar, Murcia, Spain\u001b[0m" + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "What is the hometown of the reigning men's U.S. Open champion?\n", + "Are follow up questions needed here:\u001b[102m Yes.\n", + "Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n", + "Intermediate answer: \u001b[103mCarlos Alcaraz won the 2022 Men's single title while Poland's Iga Swiatek won the Women's single title defeating Tunisian's Ons Jabeur..\u001b[0m\u001b[102m\n", + "Follow up: Where is Carlos Alcaraz from?\u001b[0m\n", + "Intermediate answer: \u001b[103mEl Palmar, Murcia, Spain.\u001b[0m\u001b[102m\n", + "So the final answer is: El Palmar, Murcia, Spain\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { @@ -44,7 +58,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6195fc82", + "id": "683d69e7", "metadata": {}, "outputs": [], "source": [] diff --git a/examples/simple_prompts.ipynb b/docs/examples/demos/simple_prompts.ipynb similarity index 83% rename from examples/simple_prompts.ipynb rename to docs/examples/demos/simple_prompts.ipynb index b0800b0a..4fb65a18 100644 --- a/examples/simple_prompts.ipynb +++ b/docs/examples/demos/simple_prompts.ipynb @@ -1,8 +1,18 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "d8a5c5d4", + "metadata": {}, + "source": [ + "# Simple Example\n", + "\n", + "This notebook showcases a simple chain." + ] + }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "51a54c4d", "metadata": {}, "outputs": [ @@ -12,7 +22,7 @@ "' The year Justin Beiber was born was 1994. In 1994, the Dallas Cowboys won the Super Bowl.'" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -28,7 +38,7 @@ "\n", "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", "\n", - "llm_chain.predict(question=question)" + "llm_chain.run(question)" ] }, { diff --git a/examples/sqlite.ipynb b/docs/examples/demos/sqlite.ipynb similarity index 63% rename from examples/sqlite.ipynb rename to docs/examples/demos/sqlite.ipynb index 9f8be5a4..4f12800a 100644 --- a/examples/sqlite.ipynb +++ b/docs/examples/demos/sqlite.ipynb @@ -1,9 +1,27 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "0ed6aab1", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# SQLite example\n", + "\n", + "This example showcases hooking up an LLM to answer questions over a database." + ] + }, { "cell_type": "markdown", "id": "b2f66479", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "This uses the example Chinook database.\n", "To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository." @@ -13,7 +31,11 @@ "cell_type": "code", "execution_count": 1, "id": "d0e27d88", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from langchain import OpenAI, SQLDatabase, SQLDatabaseChain" @@ -23,10 +45,14 @@ "cell_type": "code", "execution_count": 2, "id": "72ede462", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "db = SQLDatabase.from_uri(\"sqlite:///../notebooks/Chinook.db\")\n", + "db = SQLDatabase.from_uri(\"sqlite:///../../../notebooks/Chinook.db\")\n", "llm = OpenAI(temperature=0)\n", "db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)" ] @@ -35,21 +61,30 @@ "cell_type": "code", "execution_count": 3, "id": "15ff81df", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[102m SELECT COUNT(*) FROM Employee\u001b[0m\u001b[49m\n", - "SQLResult: \u001b[0m\u001b[103m[(8,)]\u001b[0m\u001b[49m\n", - "Answer:\u001b[0m\u001b[102m There are 8 employees.\u001b[0m" + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "How many employees are there?\n", + "SQLQuery:\u001b[102m SELECT COUNT(*) FROM Employee\u001b[0m\n", + "SQLResult: \u001b[103m[(8,)]\u001b[0m\n", + "Answer:\u001b[102m 8\u001b[0m\n", + "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { "data": { "text/plain": [ - "' There are 8 employees.'" + "' 8'" ] }, "execution_count": 3, @@ -64,7 +99,7 @@ { "cell_type": "code", "execution_count": null, - "id": "146fa162", + "id": "61d91b85", "metadata": {}, "outputs": [], "source": [] diff --git a/examples/vector_db_qa.ipynb b/docs/examples/demos/vector_db_qa.ipynb similarity index 80% rename from examples/vector_db_qa.ipynb rename to docs/examples/demos/vector_db_qa.ipynb index 9e774d26..bf1fcbf9 100644 --- a/examples/vector_db_qa.ipynb +++ b/docs/examples/demos/vector_db_qa.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "07c1e3b9", + "metadata": {}, + "source": [ + "# Vector DB Question/Answering\n", + "\n", + "This example showcases question answering over a vector database." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -15,12 +25,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "5c7049db", "metadata": {}, "outputs": [], "source": [ - "with open('state_of_the_union.txt') as f:\n", + "with open('../state_of_the_union.txt') as f:\n", " state_of_the_union = f.read()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_text(state_of_the_union)\n", @@ -31,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "3018f865", "metadata": {}, "outputs": [], @@ -41,17 +51,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "032a47f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "\" The president said that Ketanji Brown Jackson is one of our nation's top legal minds, who will continue Justice Breyer’s legacy of excellence.\"" + "' The President said that Ketanji Brown Jackson is a consensus builder and has received a broad range of support since she was nominated.'" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/examples/integrations.rst b/docs/examples/integrations.rst new file mode 100644 index 00000000..fb2b3cae --- /dev/null +++ b/docs/examples/integrations.rst @@ -0,0 +1,10 @@ +Integrations +============ + +The examples here all highlight a specific type of integration. + +.. toctree:: + :maxdepth: 1 + :glob: + + integrations/* \ No newline at end of file diff --git a/examples/embeddings.ipynb b/docs/examples/integrations/embeddings.ipynb similarity index 83% rename from examples/embeddings.ipynb rename to docs/examples/integrations/embeddings.ipynb index fb714b4d..e80a675f 100644 --- a/examples/embeddings.ipynb +++ b/docs/examples/integrations/embeddings.ipynb @@ -1,10 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "7ef4d402-6662-4a26-b612-35b542066487", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Embeddings & VectorStores\n", + "\n", + "This notebook show cases how to use embeddings to create a VectorStore" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "965eecee", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from langchain.embeddings.openai import OpenAIEmbeddings\n", @@ -15,12 +33,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "68481687", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "with open('state_of_the_union.txt') as f:\n", + "with open('../state_of_the_union.txt') as f:\n", " state_of_the_union = f.read()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_text(state_of_the_union)\n", @@ -30,9 +52,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "015f4ff5", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "docsearch = FAISS.from_texts(texts, embeddings)\n", @@ -43,9 +69,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "67baf32e", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -67,11 +97,23 @@ "print(docs[0].page_content)" ] }, + { + "cell_type": "markdown", + "id": "eea6e627", + "metadata": {}, + "source": [ + "## Requires having ElasticSearch setup" + ] + }, { "cell_type": "code", "execution_count": 6, "id": "4906b8a3", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "docsearch = ElasticVectorSearch.from_texts(texts, embeddings, elasticsearch_url=\"http://localhost:9200\")\n", @@ -84,7 +126,11 @@ "cell_type": "code", "execution_count": 7, "id": "95f9eee9", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -105,14 +151,6 @@ "source": [ "print(docs[0].page_content)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70a253c4", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -131,7 +169,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/examples/huggingface_hub.ipynb b/docs/examples/integrations/huggingface_hub.ipynb similarity index 84% rename from examples/huggingface_hub.ipynb rename to docs/examples/integrations/huggingface_hub.ipynb index 06245be9..b2e59376 100644 --- a/examples/huggingface_hub.ipynb +++ b/docs/examples/integrations/huggingface_hub.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "959300d4", + "metadata": {}, + "source": [ + "# HuggingFace Hub\n", + "\n", + "This example showcases how to connect to the HuggingFace Hub." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -25,7 +35,7 @@ "\n", "question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n", "\n", - "print(llm_chain.predict(question=question))" + "print(llm_chain.run(question))" ] }, { @@ -53,7 +63,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.7" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/docs/examples/integrations/huggingface_tokenizer_text_splitter.ipynb b/docs/examples/integrations/huggingface_tokenizer_text_splitter.ipynb new file mode 100644 index 00000000..2ee0541e --- /dev/null +++ b/docs/examples/integrations/huggingface_tokenizer_text_splitter.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b118c9dc", + "metadata": {}, + "source": [ + "# HuggingFace Tokenizers\n", + "\n", + "This notebook show cases how to use HuggingFace tokenizers to split text." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e82c4685", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.text_splitter import CharacterTextSplitter" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a8ce51d5", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import GPT2TokenizerFast\n", + "\n", + "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ca5e72c0", + "metadata": {}, + "outputs": [], + "source": [ + "with open('../state_of_the_union.txt') as f:\n", + " state_of_the_union = f.read()\n", + "text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_text(state_of_the_union)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "37cdfbeb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n", + "\n", + "Last year COVID-19 kept us apart. This year we are finally together again. \n", + "\n", + "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n", + "\n", + "With a duty to one another to the American people to the Constitution. \n", + "\n", + "And with an unwavering resolve that freedom will always triumph over tyranny. \n", + "\n", + "Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n", + "\n", + "He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n", + "\n", + "He met the Ukrainian people. \n", + "\n", + "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n", + "\n", + "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n", + "\n", + "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \n", + "\n", + "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n", + "\n", + "Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n", + "\n", + "Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n", + "\n", + "They keep moving. \n", + "\n", + "And the costs and the threats to America and the world keep rising. \n", + "\n", + "That’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \n", + "\n", + "The United States is a member along with 29 other nations. \n", + "\n", + "It matters. American diplomacy matters. American resolve matters. \n", + "\n", + "Putin’s latest attack on Ukraine was premeditated and unprovoked. \n", + "\n", + "He rejected repeated efforts at diplomacy. \n", + "\n", + "He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \n", + "\n", + "We prepared extensively and carefully. \n", + "\n", + "We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \n", + "\n", + "I spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \n", + "\n", + "We countered Russia’s lies with truth. \n", + "\n", + "And now that he has acted the free world is holding him accountable. \n", + "\n", + "Along with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \n", + "\n", + "We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \n", + "\n", + "Together with our allies –we are right now enforcing powerful economic sanctions. \n", + "\n", + "We are cutting off Russia’s largest banks from the international financial system. \n", + "\n", + "Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \n", + "\n", + "We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. \n", + "\n", + "Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n", + "\n", + "The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n", + "\n", + "We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. \n", + "\n", + "And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \n", + "\n", + "The Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. \n", + "\n", + "Together with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \n", + "\n", + "We are giving more than $1 Billion in direct assistance to Ukraine. \n", + "\n", + "And we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering. \n", + "\n", + "Let me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine. \n", + "\n", + "Our forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west. \n" + ] + } + ], + "source": [ + "print(texts[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d214aec2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/manifest.ipynb b/docs/examples/integrations/manifest.ipynb similarity index 96% rename from examples/manifest.ipynb rename to docs/examples/integrations/manifest.ipynb index 18e80a87..2c809487 100644 --- a/examples/manifest.ipynb +++ b/docs/examples/integrations/manifest.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "b4462a94", + "metadata": {}, + "source": [ + "# Manifest\n", + "\n", + "This notebook goes over how to use Manifest and LangChain." + ] + }, { "cell_type": "markdown", "id": "59fcaebc", @@ -96,7 +106,7 @@ } ], "source": [ - "with open('state_of_the_union.txt') as f:\n", + "with open('../state_of_the_union.txt') as f:\n", " state_of_the_union = f.read()\n", "mp_chain.run(state_of_the_union)" ] diff --git a/examples/model_laboratory.ipynb b/docs/examples/model_laboratory.ipynb similarity index 88% rename from examples/model_laboratory.ipynb rename to docs/examples/model_laboratory.ipynb index b648ca49..0646386e 100644 --- a/examples/model_laboratory.ipynb +++ b/docs/examples/model_laboratory.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "920a3c1a", + "metadata": {}, + "source": [ + "# Model Laboratory\n", + "\n", + "This example goes over basic functionality of how to use the ModelLaboratory to test out and try different models." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -18,7 +28,11 @@ "metadata": {}, "outputs": [], "source": [ - "llms = [OpenAI(temperature=0), Cohere(model=\"command-xlarge-20221108\", max_tokens=20, temperature=0), HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":1})]" + "llms = [\n", + " OpenAI(temperature=0), \n", + " Cohere(model=\"command-xlarge-20221108\", max_tokens=20, temperature=0), \n", + " HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":1})\n", + "]" ] }, { diff --git a/docs/examples/prompts.rst b/docs/examples/prompts.rst new file mode 100644 index 00000000..6fe34382 --- /dev/null +++ b/docs/examples/prompts.rst @@ -0,0 +1,10 @@ +Prompts +======= + +The examples here all highlight how to work with prompts. + +.. toctree:: + :maxdepth: 1 + :glob: + + prompts/* diff --git a/examples/generate_examples.ipynb b/docs/examples/prompts/generate_examples.ipynb similarity index 70% rename from examples/generate_examples.ipynb rename to docs/examples/prompts/generate_examples.ipynb index 2c334ba0..db28cdec 100644 --- a/examples/generate_examples.ipynb +++ b/docs/examples/prompts/generate_examples.ipynb @@ -1,10 +1,28 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "f5d249ee", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Generate Examples\n", + "\n", + "This notebook shows how to use LangChain to generate more examples similar to the ones you already have." + ] + }, { "cell_type": "code", "execution_count": 1, "id": "1685fa2f", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from langchain.chains.react.prompt import EXAMPLES\n", @@ -14,9 +32,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "334ef4f7", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -24,7 +46,7 @@ "'Question: What is the elevation range for the area that the eastern sector of the\\nColorado orogeny extends into?\\nThought 1: I need to search Colorado orogeny, find the area that the eastern sector\\nof the Colorado orogeny extends into, then find the elevation range of the\\narea.\\nAction 1: Search[Colorado orogeny]\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in\\nColorado and surrounding areas.\\nThought 2: It does not mention the eastern sector. So I need to look up eastern\\nsector.\\nAction 2: Lookup[eastern sector]\\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called\\nthe Central Plains orogeny.\\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I\\nneed to search High Plains and find its elevation range.\\nAction 3: Search[High Plains]\\nObservation 3: High Plains refers to one of two distinct land regions\\nThought 4: I need to instead search High Plains (United States).\\nAction 4: Search[High Plains (United States)]\\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the\\nHigh Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130\\nm).[3]\\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer\\nis 1,800 to 7,000 ft.\\nAction 5: Finish[1,800 to 7,000 ft]'" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -36,9 +58,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "a7bd36bc", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "new_example = generate_example(EXAMPLES, OpenAI())" @@ -46,40 +72,35 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "e1efb008", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { "text/plain": [ "['',\n", " '',\n", - " 'Question: Is the Mount Everest taller than the Mount Kilimanjaro?',\n", - " '',\n", - " 'Thought 1: I need to search Mount Everest and Mount Kilimanjaro, find their',\n", - " 'heights, then compare them.',\n", + " 'Question: Which ocean is the world’s smallest?',\n", " '',\n", - " 'Action 1: Search[Mount Everest]',\n", + " 'Thought 1: I need to search for oceans and find which one is the world’s smallest.',\n", " '',\n", - " \"Observation 1: Mount Everest, at 8,848 metres (29,029 ft), is the world's highest mountain\",\n", - " 'and a particularly popular goal for mountaineers.',\n", + " 'Action 1: Search[oceans]',\n", " '',\n", - " 'Thought 2: Mount Everest is 8,848 metres tall. I need to search Mount Kilimanjaro',\n", - " 'next.',\n", + " 'Observation 1: There are five oceans: the Pacific, Atlantic, Indian, Southern, and Arctic.',\n", " '',\n", - " 'Action 2: Search[Mount Kilimanjaro]',\n", + " 'Thought 2: I need to compare the sizes of the oceans and find which one is the smallest.',\n", " '',\n", - " 'Observation 2: Mount Kilimanjaro, with its three volcanic cones, Kibo, Mawenzi, and',\n", - " 'Shira, is a freestanding mountain in Tanzania. It is the highest mountain in',\n", - " 'Africa, and rises approximately 4,900 metres (16,100 ft) from its base to 5,895',\n", - " 'metres (19,341 ft) above sea level.',\n", + " 'Action 2: Compare[Pacific, Atlantic, Indian, Southern, Arctic]',\n", " '',\n", - " 'Thought 3: Mount Kilimanjaro is 5,895 metres tall. 8,848 metres (Mount Everest) >',\n", - " '5,895 metres (Mount Kil']" + " 'Observation 2: The Arctic is the smallest ocean.']" ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -91,7 +112,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d8843d7b", + "id": "1ed01ba2", "metadata": {}, "outputs": [], "source": [] @@ -113,7 +134,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/examples/prompt_optimization.ipynb b/docs/examples/prompts/prompt_optimization.ipynb similarity index 74% rename from examples/prompt_optimization.ipynb rename to docs/examples/prompts/prompt_optimization.ipynb index 2c8d1358..c06d3a98 100644 --- a/examples/prompt_optimization.ipynb +++ b/docs/examples/prompts/prompt_optimization.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "d7467b67", + "metadata": {}, + "source": [ + "# Optimized Prompts\n", + "\n", + "This example showcases how using the OptimizedPrompt class enables selection of the most relevant examples to include as few-shot examples in the prompt." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -13,7 +23,7 @@ "from langchain.llms.openai import OpenAI\n", "from langchain.prompts.optimized import OptimizedPrompt\n", "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n", - "from langchain.vectorstores.faiss import FAISS" + "from langchain.vectorstores.faiss_search import FAISS" ] }, { @@ -101,10 +111,18 @@ "print(prompt.format(k=1, input=\"What is the highest mountain peak in Asia?\"))" ] }, + { + "cell_type": "markdown", + "id": "a5dc3525", + "metadata": {}, + "source": [ + "## Requires having ElasticSearch setup" + ] + }, { "cell_type": "code", - "execution_count": 7, - "id": "f7f06820", + "execution_count": null, + "id": "bbd92d08", "metadata": {}, "outputs": [], "source": [ @@ -120,48 +138,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "bd91f408", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "Question: What is the elevation range for the area that the eastern sector of the\n", - "Colorado orogeny extends into?\n", - "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector\n", - "of the Colorado orogeny extends into, then find the elevation range of the\n", - "area.\n", - "Action 1: Search[Colorado orogeny]\n", - "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in\n", - "Colorado and surrounding areas.\n", - "Thought 2: It does not mention the eastern sector. So I need to look up eastern\n", - "sector.\n", - "Action 2: Lookup[eastern sector]\n", - "Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called\n", - "the Central Plains orogeny.\n", - "Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I\n", - "need to search High Plains and find its elevation range.\n", - "Action 3: Search[High Plains]\n", - "Observation 3: High Plains refers to one of two distinct land regions\n", - "Thought 4: I need to instead search High Plains (United States).\n", - "Action 4: Search[High Plains (United States)]\n", - "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the\n", - "High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130\n", - "m).[3]\n", - "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer\n", - "is 1,800 to 7,000 ft.\n", - "Action 5: Finish[1,800 to 7,000 ft]\n", - "\n", - "\n", - "\n", - "Question: What is the highest mountain peak in Asia?\n" - ] - } - ], + "outputs": [], "source": [ "print(prompt.format(k=1, input=\"What is the highest mountain peak in Asia?\"))" ] @@ -191,7 +171,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/examples/state_of_the_union.txt b/docs/examples/state_of_the_union.txt similarity index 100% rename from examples/state_of_the_union.txt rename to docs/examples/state_of_the_union.txt diff --git a/docs/getting_started/chains.md b/docs/getting_started/chains.md new file mode 100644 index 00000000..68013b6c --- /dev/null +++ b/docs/getting_started/chains.md @@ -0,0 +1,38 @@ +# Using Chains + +Calling an LLM is a great first step, but it's just the beginning. +Normally when you use an LLM in an application, you are not sending user input directly to the LLM. +Instead, you are probably taking user input and constructing a prompt, and then sending that to the LLM. + +For example, in the previous example, the text we passed in was hardcoded to ask for a name for a company that made colorful socks. +In this imaginary service, what we would want to do is take only the user input describing what the company does, and then format the prompt with that information. + +This is easy to do with LangChain! + +First lets define the prompt: + +```python +from langchain.prompts import Prompt +prompt = Prompt( + input_variables=["product"], + template="What is a good name for a company that makes {product}?", +) +``` + +We can now create a very simple chain that will take user input, format the prompt with it, and then send it to the LLM: + +```python +from langchain.chains import LLMChain +chain = LLMChain(llm=llm, prompt=prompt) +``` + +Now we can run that can only specifying the product! + +```python +chain.run("colorful socks") +``` + +There we go! There's the first chain. + +That is it for the Getting Started example. +As a next step, we would suggest checking out the more complex chains in the [Demos section](/examples/demos.rst) diff --git a/docs/getting_started/environment.md b/docs/getting_started/environment.md new file mode 100644 index 00000000..15a25c46 --- /dev/null +++ b/docs/getting_started/environment.md @@ -0,0 +1,37 @@ +# Setting up your environment + +Using LangChain will usually require integrations with one or more model providers, data stores, apis, etc. +There are two components to setting this up, installing the correct python packages and setting the right environment variables. + +## Python packages +The python package needed varies based on the integration. See the list of integrations for details. +There should also be helpful error messages raised if you try to run an integration and are missing any required python packages. + +## Environment Variables +The environment variable needed varies based on the integration. See the list of integrations for details. +There should also be helpful error messages raised if you try to run an integration and are missing any required environment variables. + +You can set the environment variable in a few ways. +If you are trying to set the environment variable `FOO` to value `bar`, here are the ways you could do so: +- From the command line: +``` +export FOO=bar +``` +- From the python notebook/script: +```python +import os +os.environ["FOO"] = "bar" +``` + +For the Getting Started example, we will be using OpenAI's APIs, so we will first need to install their SDK: + +``` +pip install openai +``` + +We will then need to set the environment variable. Let's do this from inside the Jupyter notebook (or Python script). + +```python +import os +os.environ["OPENAI_API_KEY"] = "..." +``` diff --git a/docs/getting_started/installation.md b/docs/getting_started/installation.md new file mode 100644 index 00000000..d098234f --- /dev/null +++ b/docs/getting_started/installation.md @@ -0,0 +1,11 @@ +# Installation + +LangChain is available on PyPi, so to it is easily installable with: + +``` +pip install langchain +``` + +For more involved installation options, see the [Installation Reference](/installation.md) section. + +That's it! LangChain is now installed. You can now use LangChain from a python script or Jupyter notebook. diff --git a/docs/getting_started/llm.md b/docs/getting_started/llm.md new file mode 100644 index 00000000..a4ac89cc --- /dev/null +++ b/docs/getting_started/llm.md @@ -0,0 +1,25 @@ +# Calling a LLM + +The most basic building block of LangChain is calling an LLM on some input. +Let's walk through a simple example of how to do this. +For this purpose, let's pretend we are building a service that generates a company name based on what the company makes. + +In order to do this, we first need to import the LLM wrapper. + +```python +from langchain.llms import OpenAI +``` + +We can then initialize the wrapper with any arguments. +In this example, we probably want the outputs to be MORE random, so we'll initialize it with a HIGH temperature. + +```python +llm = OpenAI(temperature=0.9) +``` + +We can now call it on some input! + +```python +text = "What would be a good company name a company that makes colorful socks?" +llm(text) +``` diff --git a/docs/index.rst b/docs/index.rst index 2acf057f..016b72a7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,18 +1,82 @@ Welcome to LangChain ========================== +Large language models (LLMs) are emerging as a transformative technology, enabling +developers to build applications that they previously could not. +But using these LLMs in isolation is often not enough to +create a truly powerful app - the real power comes when you are able to +combine them with other sources of computation or knowledge. + +This library is aimed at assisting in the development of those types of applications. +It aims to create: + +1. a comprehensive collection of pieces you would ever want to combine +2. a flexible interface for combining pieces into a single comprehensive "chain" +3. a schema for easily saving and sharing those chains + +The documentation is structured into the following sections: + + +.. toctree:: + :maxdepth: 1 + :caption: Getting Started + :name: getting_started + + getting_started/installation.md + getting_started/environment.md + getting_started/llm.md + getting_started/chains.md + +Goes over a simple walk through and tutorial for getting started setting up a simple chain that generates a company name based on what the company makes. +Covers installation, environment set up, calling LLMs, and using prompts. +Start here if you haven't used LangChain before. + + .. toctree:: - :maxdepth: 2 - :caption: User API + :maxdepth: 1 + :caption: How-To Examples + :name: examples + examples/demos.rst + examples/integrations.rst + examples/prompts.rst + examples/model_laboratory.ipynb + +More elaborate examples and walk-throughs of particular +integrations and use cases. This is the place to look if you have questions +about how to integrate certain pieces, or if you want to find examples of +common tasks or cool demos. + + +.. toctree:: + :maxdepth: 1 + :caption: Reference + :name: reference + + installation.md + integrations.md modules/prompt modules/llms modules/embeddings + modules/text_splitter + modules/vectorstore modules/chains + +Full API documentation. This is the place to look if you want to +see detailed information about the various classes, methods, and APIs. + + .. toctree:: :maxdepth: 1 :caption: Resources + :name: resources + core_concepts.md glossary.md Discord + +Higher level, conceptual explanations of the LangChain components. +This is the place to go if you want to increase your high level understanding +of the problems LangChain is solving, and how we decided to go about do so. + diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 00000000..e1ed80f8 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,24 @@ +# Installation Options + +LangChain is available on PyPi, so to it is easily installable with: + +``` +pip install langchain +``` + +That will install the bare minimum requirements of LangChain. +A lot of the value of LangChain comes when integrating it with various model providers, datastores, etc. +By default, the dependencies needed to do that are NOT installed. +However, there are two other ways to install LangChain that do bring in those dependencies. + +To install modules needed for the common LLM providers, run: + +``` +pip install langchain[llms] +``` + +To install all modules needed for all integrations, run: + +``` +pip install langchain[all] +``` \ No newline at end of file diff --git a/docs/integrations.md b/docs/integrations.md new file mode 100644 index 00000000..388a1325 --- /dev/null +++ b/docs/integrations.md @@ -0,0 +1,33 @@ +# Integration Reference + +Besides the installation of this python package, you will also need to install packages and set environment variables depending on which chains you want to use. + +Note: the reason these packages are not included in the dependencies by default is that as we imagine scaling this package, we do not want to force dependencies that are not needed. + +The following use cases require specific installs and api keys: + +- _OpenAI_: + - Install requirements with `pip install openai` + - Get an OpenAI api key and either set it as an environment variable (`OPENAI_API_KEY`) or pass it to the LLM constructor as `openai_api_key`. +- _Cohere_: + - Install requirements with `pip install cohere` + - Get a Cohere api key and either set it as an environment variable (`COHERE_API_KEY`) or pass it to the LLM constructor as `cohere_api_key`. +- _HuggingFace Hub_ + - Install requirements with `pip install huggingface_hub` + - Get a HuggingFace Hub api token and either set it as an environment variable (`HUGGINGFACEHUB_API_TOKEN`) or pass it to the LLM constructor as `huggingfacehub_api_token`. +- _SerpAPI_: + - Install requirements with `pip install google-search-results` + - Get a SerpAPI api key and either set it as an environment variable (`SERPAPI_API_KEY`) or pass it to the LLM constructor as `serpapi_api_key`. +- _NatBot_: + - Install requirements with `pip install playwright` +- _Wikipedia_: + - Install requirements with `pip install wikipedia` +- _Elasticsearch_: + - Install requirements with `pip install elasticsearch` + - Set up Elasticsearch backend. If you want to do locally, [this](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/getting-started.html) is a good guide. +- _FAISS_: + - Install requirements with `pip install faiss` for Python 3.7 and `pip install faiss-cpu` for Python 3.10+. +- _Manifest_: + - Install requirements with `pip install manifest-ml` (Note: this is only available in Python 3.8+ currently). + +If you are using the `NLTKTextSplitter` or the `SpacyTextSplitter`, you will also need to install the appropriate models. For example, if you want to use the `SpacyTextSplitter`, you will need to install the `en_core_web_sm` model with `python -m spacy download en_core_web_sm`. Similarly, if you want to use the `NLTKTextSplitter`, you will need to install the `punkt` model with `python -m nltk.downloader punkt`. diff --git a/docs/modules/text_splitter.rst b/docs/modules/text_splitter.rst new file mode 100644 index 00000000..1df38dd1 --- /dev/null +++ b/docs/modules/text_splitter.rst @@ -0,0 +1,6 @@ +:mod:`langchain.text_splitter` +============================== + +.. automodule:: langchain.text_splitter + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/modules/vectorstore.rst b/docs/modules/vectorstore.rst new file mode 100644 index 00000000..04ae7606 --- /dev/null +++ b/docs/modules/vectorstore.rst @@ -0,0 +1,6 @@ +:mod:`langchain.vectorstores` +============================= + +.. automodule:: langchain.vectorstores + :members: + :undoc-members: \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 81cf4106..206b5066 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,8 @@ +autodoc_pydantic==1.8.0 +myst_parser +nbsphinx==0.8.9 sphinx==4.5.0 sphinx-autobuild==2021.3.14 sphinx_rtd_theme==1.0.0 sphinx-typlog-theme==0.8.0 -autodoc_pydantic==1.8.0 -myst_parser +sphinx-panels diff --git a/examples/huggingface_tokenizer_text_splitter.ipynb b/examples/huggingface_tokenizer_text_splitter.ipynb deleted file mode 100644 index afd6db90..00000000 --- a/examples/huggingface_tokenizer_text_splitter.ipynb +++ /dev/null @@ -1,104 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "e82c4685", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.text_splitter import HuggingFaceTokenizerSplitter" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a8ce51d5", - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import GPT2TokenizerFast\n", - "\n", - "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ca5e72c0", - "metadata": {}, - "outputs": [], - "source": [ - "with open('state_of_the_union.txt') as f:\n", - " state_of_the_union = f.read()\n", - "text_splitter = HuggingFaceTokenizerSplitter(tokenizer, chunk_size=1000, chunk_overlap=0)\n", - "texts = text_splitter.split_text(state_of_the_union)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "37cdfbeb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n", - "\n", - "Last year COVID-19 kept us apart. This year we are finally together again. \n", - "\n", - "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n", - "\n", - "With a duty to one another to the American people to the Constitution. \n", - "\n", - "And with an unwavering resolve that freedom will always triumph over tyranny. \n", - "\n", - "Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n", - "\n", - "He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n", - "\n", - "He met the Ukrainian people. \n", - "\n", - "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n", - "\n", - "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n" - ] - } - ], - "source": [ - "print(texts[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d214aec2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/react.ipynb b/examples/react.ipynb deleted file mode 100644 index 5b0a50ae..00000000 --- a/examples/react.ipynb +++ /dev/null @@ -1,83 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "id": "4e272b47", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain import OpenAI, ReActChain, Wikipedia\n", - "\n", - "llm = OpenAI(temperature=0)\n", - "react = ReActChain(llm=llm, docstore=Wikipedia(), verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8078c8f1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[102m I need to search David Chanoff and find the U.S. Navy admiral he\n", - "collaborated with.\n", - "Action 1: Search[David Chanoff]\u001b[0m\u001b[49m\n", - "Observation 1: \u001b[0m\u001b[103mDavid Chanoff is a noted author of non-fiction work. His work has typically involved collaborations with the principal protagonist of the work concerned. His collaborators have included; Augustus A. White, Joycelyn Elders, Đoàn Văn Toại, William J. Crowe, Ariel Sharon, Kenneth Good and Felix Zandman. He has also written about a wide range of subjects including literary history, education and foreign for The Washington Post, The New Republic and The New York Times Magazine. He has published more than twelve books.\u001b[0m\u001b[49m\n", - "Thought 2:\u001b[0m\u001b[102m The U.S. Navy admiral David Chanoff collaborated with is William J. Crowe.\n", - "Action 2: Search[William J. Crowe]\u001b[0m\u001b[49m\n", - "Observation 2: \u001b[0m\u001b[103mWilliam James Crowe Jr. (January 2, 1925 – October 18, 2007) was a United States Navy admiral and diplomat who served as the 11th chairman of the Joint Chiefs of Staff under Presidents Ronald Reagan and George H. W. Bush, and as the ambassador to the United Kingdom and Chair of the Intelligence Oversight Board under President Bill Clinton.\u001b[0m\u001b[49m\n", - "Thought 3:\u001b[0m\u001b[102m William J. Crowe served as the ambassador to the United Kingdom under President Bill Clinton. So the answer is Bill Clinton.\n", - "Action 3: Finish[Bill Clinton]\u001b[0m" - ] - }, - { - "data": { - "text/plain": [ - "'Bill Clinton'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "question = \"Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\"\n", - "react.run(question)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a6bd3b4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}