diff --git a/libs/langchain/langchain/text_splitter.py b/libs/langchain/langchain/text_splitter.py index d0bf6fca1b..be189548c1 100644 --- a/libs/langchain/langchain/text_splitter.py +++ b/libs/langchain/langchain/text_splitter.py @@ -627,6 +627,7 @@ class Language(str, Enum): LATEX = "latex" HTML = "html" SOL = "sol" + CSHARP = "csharp" class RecursiveCharacterTextSplitter(TextSplitter): @@ -1002,6 +1003,43 @@ class RecursiveCharacterTextSplitter(TextSplitter): " None: + splitter = RecursiveCharacterTextSplitter.from_language( + Language.CSHARP, chunk_size=CHUNK_SIZE, chunk_overlap=0 + ) + code = """ +using System; +class Program +{ + static void Main() + { + int age = 30; // Change the age value as needed + + // Categorize the age without any console output + if (age < 18) + { + // Age is under 18 + } + else if (age >= 18 && age < 65) + { + // Age is an adult + } + else + { + // Age is a senior citizen + } + } +} + """ + + chunks = splitter.split_text(code) + assert chunks == [ + "using System;", + "class Program\n{", + "static void", + "Main()", + "{", + "int age", + "= 30; // Change", + "the age value", + "as needed", + "//", + "Categorize the", + "age without any", + "console output", + "if (age", + "< 18)", + "{", + "//", + "Age is under 18", + "}", + "else if", + "(age >= 18 &&", + "age < 65)", + "{", + "//", + "Age is an adult", + "}", + "else", + "{", + "//", + "Age is a senior", + "citizen", + "}\n }", + "}", + ] + + def test_cpp_code_splitter() -> None: splitter = RecursiveCharacterTextSplitter.from_language( Language.CPP, chunk_size=CHUNK_SIZE, chunk_overlap=0