From 887bb12287f887599f1e6c24b7cb175eb2290848 Mon Sep 17 00:00:00 2001 From: Jeroen Van Goey Date: Thu, 6 Jul 2023 15:24:25 +0200 Subject: [PATCH] Use correct Language for html_splitter (#7274) `html_splitter` was using `Language.MARKDOWN`. --- .../text_splitters/code_splitter.mdx | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/docs/snippets/modules/data_connection/document_transformers/text_splitters/code_splitter.mdx b/docs/snippets/modules/data_connection/document_transformers/text_splitters/code_splitter.mdx index 5e8032d8a1..e8e40a2734 100644 --- a/docs/snippets/modules/data_connection/document_transformers/text_splitters/code_splitter.mdx +++ b/docs/snippets/modules/data_connection/document_transformers/text_splitters/code_splitter.mdx @@ -253,7 +253,7 @@ html_text = """ ```python html_splitter = RecursiveCharacterTextSplitter.from_language( - language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0 + language=Language.HTML, chunk_size=60, chunk_overlap=0 ) html_docs = html_splitter.create_documents([html_text]) html_docs @@ -262,19 +262,18 @@ html_docs ``` - [Document(page_content='\n\n ', metadata={}), - Document(page_content='🦜️🔗 LangChain\n \n \n \n
', metadata={}), - Document(page_content='

🦜️🔗 LangChain

', metadata={}), - Document(page_content='

⚡ Building applications with LLMs through', metadata={}), - Document(page_content='composability ⚡

', metadata={}), - Document(page_content='
\n
', metadata={}), - Document(page_content='As an open source project in a rapidly', metadata={}), - Document(page_content='developing field, we are extremely open to contributions.', metadata={}), + [Document(page_content='\n', metadata={}), + Document(page_content='\n 🦜️🔗 LangChain', metadata={}), + Document(page_content='\n \ No newline at end of file +