langchain/docs/scripts/check_templates.py

73 lines
2.1 KiB
Python

import re
import sys
from pathlib import Path
from typing import Union
CURR_DIR = Path(__file__).parent.absolute()
CHAT_MODEL_HEADERS = (
"## Overview",
"### Integration details",
"### Model features",
"## Setup",
"## Instantiation",
"## Invocation",
"## Chaining",
"## API reference",
)
CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS)
DOCUMENT_LOADER_HEADERS = (
"## Overview",
"### Integration details",
"### Loader features",
"## Setup",
"## Instantiation",
"## Load",
"## Lazy Load",
"## API reference",
)
DOCUMENT_LOADER_REGEX = r".*".join(DOCUMENT_LOADER_HEADERS)
def check_chat_model(path: Path) -> None:
with open(path, "r") as f:
doc = f.read()
if not re.search(CHAT_MODEL_REGEX, doc, re.DOTALL):
raise ValueError(
f"Document {path} does not match the ChatModel Integration page template. "
f"Please see https://github.com/langchain-ai/langchain/issues/22296 for "
f"instructions on how to correctly format a ChatModel Integration page."
)
def check_document_loader(path: Path) -> None:
with open(path, "r") as f:
doc = f.read()
if not re.search(DOCUMENT_LOADER_REGEX, doc, re.DOTALL):
raise ValueError(
f"Document {path} does not match the DocumentLoader Integration page template. "
f"Please see https://github.com/langchain-ai/langchain/issues/22866 for "
f"instructions on how to correctly format a DocumentLoader Integration page."
)
def main(*new_doc_paths: Union[str, Path]) -> None:
for path in new_doc_paths:
path = Path(path).resolve().absolute()
if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents:
print(f"Checking chat model page {path}")
check_chat_model(path)
elif (
CURR_DIR.parent / "docs" / "integrations" / "document_loaders"
in path.parents
):
print(f"Checking document loader page {path}")
check_document_loader(path)
else:
continue
if __name__ == "__main__":
main(*sys.argv[1:])