diff --git a/libs/text-splitters/langchain_text_splitters/character.py b/libs/text-splitters/langchain_text_splitters/character.py index d01f2662e4..0f2ce97bcb 100644 --- a/libs/text-splitters/langchain_text_splitters/character.py +++ b/libs/text-splitters/langchain_text_splitters/character.py @@ -571,6 +571,23 @@ class RecursiveCharacterTextSplitter(TextSplitter): " ", "", ] + elif language == Language.LUA: + return [ + # Split along variable and table definitions + "\nlocal ", + # Split along function definitions + "\nfunction ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nrepeat ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] elif language == Language.HASKELL: return [ # Split along function definitions diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py index d59f06678b..1202d13f2e 100644 --- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py +++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py @@ -1248,6 +1248,53 @@ def test_solidity_code_splitter() -> None: ] +def test_lua_code_splitter() -> None: + splitter = RecursiveCharacterTextSplitter.from_language( + Language.LUA, chunk_size=CHUNK_SIZE, chunk_overlap=0 + ) + code = """ +local variable = 10 + +function add(a, b) + return a + b +end + +if variable > 5 then + for i=1, variable do + while i < variable do + repeat + print(i) + i = i + 1 + until i >= variable + end + end +end + """ + chunks = splitter.split_text(code) + assert chunks == [ + "local variable", + "= 10", + "function add(a,", + "b)", + "return a +", + "b", + "end", + "if variable > 5", + "then", + "for i=1,", + "variable do", + "while i", + "< variable do", + "repeat", + "print(i)", + "i = i + 1", + "until i >=", + "variable", + "end", + "end\nend", + ] + + def test_haskell_code_splitter() -> None: splitter = RecursiveCharacterTextSplitter.from_language( Language.HASKELL, chunk_size=CHUNK_SIZE, chunk_overlap=0