From 0ea72245358171478726aaa95a1fcde69707e115 Mon Sep 17 00:00:00 2001 From: Jona Sassenhagen Date: Tue, 11 Jul 2023 22:43:46 +0200 Subject: [PATCH] [Minor] Remove tagger from spacy sentencizer (#7534) @svlandeg gave me a tip for how to improve a bit on https://github.com/hwchase17/langchain/pull/7442 for some extra speed and memory gains. The tagger isn't needed for sentencization, so can be disabled too. --- langchain/text_splitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py index 37de47f0fc..932d252a48 100644 --- a/langchain/text_splitter.py +++ b/langchain/text_splitter.py @@ -47,7 +47,7 @@ def _make_spacy_pipeline_for_splitting(pipeline: str) -> Any: # avoid importing sentencizer = English() sentencizer.add_pipe("sentencizer") else: - sentencizer = spacy.load(pipeline, disable=["ner"]) + sentencizer = spacy.load(pipeline, exclude=["ner", "tagger"]) return sentencizer