mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Fix invalid escape sequence warnings (#8771)
<!-- Thank you for contributing to LangChain! Replace this comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out! Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md --> Description: The lines I have changed looks like incorrectly escaped for regex. In python 3.11, I receive DeprecationWarning for these lines. You don't see any warnings unless you explicitly run python with `-W always::DeprecationWarning` flag. So, this is my attempt to fix it. Here are the warnings from log files: ``` /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:919: DeprecationWarning: invalid escape sequence '\s' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:918: DeprecationWarning: invalid escape sequence '\s' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:917: DeprecationWarning: invalid escape sequence '\s' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:916: DeprecationWarning: invalid escape sequence '\c' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:903: DeprecationWarning: invalid escape sequence '\*' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:804: DeprecationWarning: invalid escape sequence '\*' /usr/local/lib/python3.11/site-packages/langchain/text_splitter.py:804: DeprecationWarning: invalid escape sequence '\*' ``` cc @baskaryan --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
0adc282d70
commit
454998c1fb
@ -832,7 +832,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
# Split along section titles
|
# Split along section titles
|
||||||
"\n=+\n",
|
"\n=+\n",
|
||||||
"\n-+\n",
|
"\n-+\n",
|
||||||
"\n\*+\n",
|
"\n\\*+\n",
|
||||||
# Split along directive markers
|
# Split along directive markers
|
||||||
"\n\n.. *\n\n",
|
"\n\n.. *\n\n",
|
||||||
# Split by the normal type of lines
|
# Split by the normal type of lines
|
||||||
@ -931,7 +931,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
# End of code block
|
# End of code block
|
||||||
"```\n",
|
"```\n",
|
||||||
# Horizontal lines
|
# Horizontal lines
|
||||||
"\n\*\*\*+\n",
|
"\n\\*\\*\\*+\n",
|
||||||
"\n---+\n",
|
"\n---+\n",
|
||||||
"\n___+\n",
|
"\n___+\n",
|
||||||
# Note that this splitter doesn't handle horizontal lines defined
|
# Note that this splitter doesn't handle horizontal lines defined
|
||||||
@ -944,19 +944,19 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
elif language == Language.LATEX:
|
elif language == Language.LATEX:
|
||||||
return [
|
return [
|
||||||
# First, try to split along Latex sections
|
# First, try to split along Latex sections
|
||||||
"\n\\\chapter{",
|
"\n\\\\chapter{",
|
||||||
"\n\\\section{",
|
"\n\\\\section{",
|
||||||
"\n\\\subsection{",
|
"\n\\\\subsection{",
|
||||||
"\n\\\subsubsection{",
|
"\n\\\\subsubsection{",
|
||||||
# Now split by environments
|
# Now split by environments
|
||||||
"\n\\\begin{enumerate}",
|
"\n\\\\begin{enumerate}",
|
||||||
"\n\\\begin{itemize}",
|
"\n\\\\begin{itemize}",
|
||||||
"\n\\\begin{description}",
|
"\n\\\\begin{description}",
|
||||||
"\n\\\begin{list}",
|
"\n\\\\begin{list}",
|
||||||
"\n\\\begin{quote}",
|
"\n\\\\begin{quote}",
|
||||||
"\n\\\begin{quotation}",
|
"\n\\\\begin{quotation}",
|
||||||
"\n\\\begin{verse}",
|
"\n\\\\begin{verse}",
|
||||||
"\n\\\begin{verbatim}",
|
"\n\\\\begin{verbatim}",
|
||||||
# Now split by math environments
|
# Now split by math environments
|
||||||
"\n\\\begin{align}",
|
"\n\\\begin{align}",
|
||||||
"$$",
|
"$$",
|
||||||
|
@ -406,6 +406,10 @@ Not a comment
|
|||||||
".. This is a",
|
".. This is a",
|
||||||
"comment",
|
"comment",
|
||||||
]
|
]
|
||||||
|
# Special test for special characters
|
||||||
|
code = "harry\n***\nbabylon is"
|
||||||
|
chunks = splitter.split_text(code)
|
||||||
|
assert chunks == ["harry", "***\nbabylon is"]
|
||||||
|
|
||||||
|
|
||||||
def test_proto_file_splitter() -> None:
|
def test_proto_file_splitter() -> None:
|
||||||
@ -680,6 +684,22 @@ This is a code block
|
|||||||
"block",
|
"block",
|
||||||
"```",
|
"```",
|
||||||
]
|
]
|
||||||
|
# Special test for special characters
|
||||||
|
code = "harry\n***\nbabylon is"
|
||||||
|
chunks = splitter.split_text(code)
|
||||||
|
assert chunks == ["harry", "***\nbabylon is"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_latex_code_splitter() -> None:
|
||||||
|
splitter = RecursiveCharacterTextSplitter.from_language(
|
||||||
|
Language.LATEX, chunk_size=CHUNK_SIZE, chunk_overlap=0
|
||||||
|
)
|
||||||
|
code = """
|
||||||
|
Hi Harrison!
|
||||||
|
\\chapter{1}
|
||||||
|
"""
|
||||||
|
chunks = splitter.split_text(code)
|
||||||
|
assert chunks == ["Hi Harrison!", "\\chapter{1}"]
|
||||||
|
|
||||||
|
|
||||||
def test_html_code_splitter() -> None:
|
def test_html_code_splitter() -> None:
|
||||||
|
Loading…
Reference in New Issue
Block a user