forked from Archives/langchain
Implements NLTK and Spacy-based TextSplitters (#103)
This PR is for Issue #88 - [x] `make format` - [x] `make lint` - [x] `make tests`harrison/prompt_examples
parent
28282ad099
commit
3ee6e332dd
@ -0,0 +1,38 @@
|
|||||||
|
"""
|
||||||
|
Test text splitting functionality using NLTK and Spacy based sentence splitters.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.text_splitter import NLTKTextSplitter, SpacyTextSplitter
|
||||||
|
|
||||||
|
|
||||||
|
def test_nltk_text_splitting_args() -> None:
|
||||||
|
"""Test invalid arguments."""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
NLTKTextSplitter(chunk_size=2, chunk_overlap=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_spacy_text_splitting_args() -> None:
|
||||||
|
"""Test invalid arguments."""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
SpacyTextSplitter(chunk_size=2, chunk_overlap=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_nltk_text_splitter() -> None:
|
||||||
|
"""Test splitting by sentence using NLTK."""
|
||||||
|
text = "This is sentence one. And this is sentence two."
|
||||||
|
separator = "|||"
|
||||||
|
splitter = NLTKTextSplitter(separator=separator)
|
||||||
|
output = splitter.split_text(text)
|
||||||
|
expected_output = [f"This is sentence one.{separator}And this is sentence two."]
|
||||||
|
assert output == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
def test_spacy_text_splitter() -> None:
|
||||||
|
"""Test splitting by sentence using Spacy."""
|
||||||
|
text = "This is sentence one. And this is sentence two."
|
||||||
|
separator = "|||"
|
||||||
|
splitter = SpacyTextSplitter(separator=separator)
|
||||||
|
output = splitter.split_text(text)
|
||||||
|
expected_output = [f"This is sentence one.{separator}And this is sentence two."]
|
||||||
|
assert output == expected_output
|
Loading…
Reference in New Issue