text-splitters[minor]: Add lua code splitting (#20421)

- **Description:** Complete the support for Lua code in
langchain.text_splitter module.
- **Dependencies:** No
- **Twitter handle:** @saberuster

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, hwchase17.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
saberuster 2024-04-14 06:42:51 +08:00 committed by GitHub
parent 4b6b0a87b6
commit 160bcaeb93
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 64 additions and 0 deletions

View File

@ -571,6 +571,23 @@ class RecursiveCharacterTextSplitter(TextSplitter):
" ",
"",
]
elif language == Language.LUA:
return [
# Split along variable and table definitions
"\nlocal ",
# Split along function definitions
"\nfunction ",
# Split along control flow statements
"\nif ",
"\nfor ",
"\nwhile ",
"\nrepeat ",
# Split by the normal type of lines
"\n\n",
"\n",
" ",
"",
]
elif language == Language.HASKELL:
return [
# Split along function definitions

View File

@ -1248,6 +1248,53 @@ def test_solidity_code_splitter() -> None:
]
def test_lua_code_splitter() -> None:
splitter = RecursiveCharacterTextSplitter.from_language(
Language.LUA, chunk_size=CHUNK_SIZE, chunk_overlap=0
)
code = """
local variable = 10
function add(a, b)
return a + b
end
if variable > 5 then
for i=1, variable do
while i < variable do
repeat
print(i)
i = i + 1
until i >= variable
end
end
end
"""
chunks = splitter.split_text(code)
assert chunks == [
"local variable",
"= 10",
"function add(a,",
"b)",
"return a +",
"b",
"end",
"if variable > 5",
"then",
"for i=1,",
"variable do",
"while i",
"< variable do",
"repeat",
"print(i)",
"i = i + 1",
"until i >=",
"variable",
"end",
"end\nend",
]
def test_haskell_code_splitter() -> None:
splitter = RecursiveCharacterTextSplitter.from_language(
Language.HASKELL, chunk_size=CHUNK_SIZE, chunk_overlap=0