@ -22,7 +22,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 1 ,
"execution_count": 4 ,
"id": "a9e37aa1",
"id": "a9e37aa1",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -35,7 +35,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 2 ,
"execution_count": 5 ,
"id": "e21a2434",
"id": "e21a2434",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -61,10 +61,14 @@
" 'html',\n",
" 'html',\n",
" 'sol',\n",
" 'sol',\n",
" 'csharp',\n",
" 'csharp',\n",
" 'cobol']"
" 'cobol',\n",
" 'c',\n",
" 'lua',\n",
" 'perl',\n",
" 'haskell']"
]
]
},
},
"execution_count": 2,
"execution_count": 5 ,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
@ -564,13 +568,50 @@
"c_docs"
"c_docs"
]
]
},
},
{
"cell_type": "markdown",
"id": "af9de667-230e-4c2a-8c5f-122a28515d97",
"metadata": {},
"source": [
"## Haskell\n",
"Here's an example using the Haskell text splitter:"
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": 3 ,
"id": "688185b5",
"id": "688185b5",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [
"source": []
{
"data": {
"text/plain": [
"[Document(page_content='main :: IO ()'),\n",
" Document(page_content='main = do\\n putStrLn \"Hello, World!\"\\n-- Some'),\n",
" Document(page_content='sample functions\\nadd :: Int -> Int -> Int\\nadd x y'),\n",
" Document(page_content='= x + y')]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HASKELL_CODE = \"\"\"\n",
"main :: IO ()\n",
"main = do\n",
" putStrLn \"Hello, World!\"\n",
"-- Some sample functions\n",
"add :: Int -> Int -> Int\n",
"add x y = x + y\n",
"\"\"\"\n",
"haskell_splitter = RecursiveCharacterTextSplitter.from_language(\n",
" language=Language.HASKELL, chunk_size=50, chunk_overlap=0\n",
")\n",
"haskell_docs = haskell_splitter.create_documents([HASKELL_CODE])\n",
"haskell_docs"
]
}
}
],
],
"metadata": {
"metadata": {