langchain/libs/community/langchain_community/document_loaders/parsers/language/php.py
david02871 e1a24d09c5
community: Add PHP language parser to document_loaders (#19850)
**Description:**
Added a PHP language parser to document_loaders
**Issue:** N/A
**Dependencies:** N/A
**Twitter handle:** N/A

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2024-04-08 11:30:28 -04:00

36 lines
850 B
Python

from typing import TYPE_CHECKING
from langchain_community.document_loaders.parsers.language.tree_sitter_segmenter import ( # noqa: E501
TreeSitterSegmenter,
)
if TYPE_CHECKING:
from tree_sitter import Language
CHUNK_QUERY = """
[
(function_definition) @function
(class_declaration) @class
(interface_declaration) @interface
(trait_declaration) @trait
(enum_declaration) @enum
(namespace_definition) @namespace
]
""".strip()
class PHPSegmenter(TreeSitterSegmenter):
"""Code segmenter for PHP."""
def get_language(self) -> "Language":
from tree_sitter_languages import get_language
return get_language("php")
def get_chunk_query(self) -> str:
return CHUNK_QUERY
def make_line_comment(self, text: str) -> str:
return f"// {text}"