mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
e1a24d09c5
**Description:** Added a PHP language parser to document_loaders **Issue:** N/A **Dependencies:** N/A **Twitter handle:** N/A --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
36 lines
850 B
Python
36 lines
850 B
Python
from typing import TYPE_CHECKING
|
|
|
|
from langchain_community.document_loaders.parsers.language.tree_sitter_segmenter import ( # noqa: E501
|
|
TreeSitterSegmenter,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from tree_sitter import Language
|
|
|
|
|
|
CHUNK_QUERY = """
|
|
[
|
|
(function_definition) @function
|
|
(class_declaration) @class
|
|
(interface_declaration) @interface
|
|
(trait_declaration) @trait
|
|
(enum_declaration) @enum
|
|
(namespace_definition) @namespace
|
|
]
|
|
""".strip()
|
|
|
|
|
|
class PHPSegmenter(TreeSitterSegmenter):
|
|
"""Code segmenter for PHP."""
|
|
|
|
def get_language(self) -> "Language":
|
|
from tree_sitter_languages import get_language
|
|
|
|
return get_language("php")
|
|
|
|
def get_chunk_query(self) -> str:
|
|
return CHUNK_QUERY
|
|
|
|
def make_line_comment(self, text: str) -> str:
|
|
return f"// {text}"
|