Harrison/docs splitter (#879)

This commit is contained in:
Harrison Chase 2023-02-03 15:09:13 -08:00 committed by GitHub
parent bcfbc7a818
commit 0b9f086d36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "b118c9dc",
"metadata": {},
@ -476,10 +475,59 @@
"print(texts[0])"
]
},
{
"cell_type": "markdown",
"id": "53049ff5",
"metadata": {},
"source": [
"## Token Text Splitter"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a1a118b1",
"metadata": {},
"outputs": [],
"source": [
"from langchain.text_splitter import TokenTextSplitter"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ef37c5d3",
"metadata": {},
"outputs": [],
"source": [
"text_splitter = TokenTextSplitter(chunk_size=10, chunk_overlap=0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5750228a",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Madam Speaker, Madam Vice President, our\n"
]
}
],
"source": [
"texts = text_splitter.split_text(state_of_the_union)\n",
"print(texts[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1a118b1",
"id": "0905c1de",
"metadata": {},
"outputs": [],
"source": []
@ -487,7 +535,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -501,7 +549,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12 (main, Mar 26 2022, 15:51:15) \n[Clang 13.1.6 (clang-1316.0.21.2)]"
"version": "3.10.9"
},
"vscode": {
"interpreter": {