mirror of
https://github.com/hwchase17/langchain
synced 2024-11-02 09:40:22 +00:00
72 lines
2.0 KiB
Python
72 lines
2.0 KiB
Python
"""**Text Splitters** are classes for splitting text.
|
|
|
|
|
|
**Class hierarchy:**
|
|
|
|
.. code-block::
|
|
|
|
BaseDocumentTransformer --> TextSplitter --> <name>TextSplitter # Example: CharacterTextSplitter
|
|
RecursiveCharacterTextSplitter --> <name>TextSplitter
|
|
|
|
Note: **MarkdownHeaderTextSplitter** and **HTMLHeaderTextSplitter do not derive from TextSplitter.
|
|
|
|
|
|
**Main helpers:**
|
|
|
|
.. code-block::
|
|
|
|
Document, Tokenizer, Language, LineType, HeaderType
|
|
|
|
""" # noqa: E501
|
|
|
|
from langchain_text_splitters.base import (
|
|
Language,
|
|
TextSplitter,
|
|
Tokenizer,
|
|
TokenTextSplitter,
|
|
split_text_on_tokens,
|
|
)
|
|
from langchain_text_splitters.character import (
|
|
CharacterTextSplitter,
|
|
RecursiveCharacterTextSplitter,
|
|
)
|
|
from langchain_text_splitters.html import ElementType, HTMLHeaderTextSplitter
|
|
from langchain_text_splitters.json import RecursiveJsonSplitter
|
|
from langchain_text_splitters.konlpy import KonlpyTextSplitter
|
|
from langchain_text_splitters.latex import LatexTextSplitter
|
|
from langchain_text_splitters.markdown import (
|
|
HeaderType,
|
|
LineType,
|
|
MarkdownHeaderTextSplitter,
|
|
MarkdownTextSplitter,
|
|
)
|
|
from langchain_text_splitters.nltk import NLTKTextSplitter
|
|
from langchain_text_splitters.python import PythonCodeTextSplitter
|
|
from langchain_text_splitters.sentence_transformers import (
|
|
SentenceTransformersTokenTextSplitter,
|
|
)
|
|
from langchain_text_splitters.spacy import SpacyTextSplitter
|
|
|
|
__all__ = [
|
|
"TokenTextSplitter",
|
|
"TextSplitter",
|
|
"Tokenizer",
|
|
"Language",
|
|
"RecursiveCharacterTextSplitter",
|
|
"RecursiveJsonSplitter",
|
|
"LatexTextSplitter",
|
|
"PythonCodeTextSplitter",
|
|
"KonlpyTextSplitter",
|
|
"SpacyTextSplitter",
|
|
"NLTKTextSplitter",
|
|
"split_text_on_tokens",
|
|
"SentenceTransformersTokenTextSplitter",
|
|
"ElementType",
|
|
"HeaderType",
|
|
"LineType",
|
|
"HTMLHeaderTextSplitter",
|
|
"MarkdownHeaderTextSplitter",
|
|
"MarkdownTextSplitter",
|
|
"CharacterTextSplitter",
|
|
]
|