From 4d8cda1c3bf548e01279eb6fbba21c5925b9c1dc Mon Sep 17 00:00:00 2001 From: ugfly1210 <30715202+ugfly1210@users.noreply.github.com> Date: Thu, 8 Jun 2023 07:01:07 +0800 Subject: [PATCH] FIX: backslash escaped (#5815) LatexTextSplitter needs to use "\n\\\chapter" when separators are escaped, such as "\n\\\chapter", otherwise it will report an error: (re.error: bad escape \c at position 1 (line 2, column 1)) Fixes # (issue) #### Before submitting #### Who can review? @hwchase17 @dev2049 Co-authored-by: Pang --- langchain/text_splitter.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py index 1e697e66..fddeb023 100644 --- a/langchain/text_splitter.py +++ b/langchain/text_splitter.py @@ -716,21 +716,21 @@ class RecursiveCharacterTextSplitter(TextSplitter): elif language == Language.LATEX: return [ # First, try to split along Latex sections - "\n\\chapter{", - "\n\\section{", - "\n\\subsection{", - "\n\\subsubsection{", + "\n\\\chapter{", + "\n\\\section{", + "\n\\\subsection{", + "\n\\\subsubsection{", # Now split by environments - "\n\\begin{enumerate}", - "\n\\begin{itemize}", - "\n\\begin{description}", - "\n\\begin{list}", - "\n\\begin{quote}", - "\n\\begin{quotation}", - "\n\\begin{verse}", - "\n\\begin{verbatim}", + "\n\\\begin{enumerate}", + "\n\\\begin{itemize}", + "\n\\\begin{description}", + "\n\\\begin{list}", + "\n\\\begin{quote}", + "\n\\\begin{quotation}", + "\n\\\begin{verse}", + "\n\\\begin{verbatim}", ## Now split by math environments - "\n\\begin{align}", + "\n\\\begin{align}", "$$", "$", # Now split by the normal type of lines