From 0b9f086d3632992e7e15b2e8b62177338bd7c7b3 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Fri, 3 Feb 2023 15:09:13 -0800 Subject: [PATCH] Harrison/docs splitter (#879) --- .../combine_docs_examples/textsplitter.ipynb | 56 +++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/docs/modules/utils/combine_docs_examples/textsplitter.ipynb b/docs/modules/utils/combine_docs_examples/textsplitter.ipynb index 6f1110bd0f..a5ccb7816b 100644 --- a/docs/modules/utils/combine_docs_examples/textsplitter.ipynb +++ b/docs/modules/utils/combine_docs_examples/textsplitter.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "b118c9dc", "metadata": {}, @@ -476,18 +475,67 @@ "print(texts[0])" ] }, + { + "cell_type": "markdown", + "id": "53049ff5", + "metadata": {}, + "source": [ + "## Token Text Splitter" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a1a118b1", "metadata": {}, "outputs": [], + "source": [ + "from langchain.text_splitter import TokenTextSplitter" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ef37c5d3", + "metadata": {}, + "outputs": [], + "source": [ + "text_splitter = TokenTextSplitter(chunk_size=10, chunk_overlap=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5750228a", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Madam Speaker, Madam Vice President, our\n" + ] + } + ], + "source": [ + "texts = text_splitter.split_text(state_of_the_union)\n", + "print(texts[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0905c1de", + "metadata": {}, + "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -501,7 +549,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12 (main, Mar 26 2022, 15:51:15) \n[Clang 13.1.6 (clang-1316.0.21.2)]" + "version": "3.10.9" }, "vscode": { "interpreter": {