infra: lint new docs to match doc loader template (#22867)

pull/22833/head^2
Isaac Francisco 3 weeks ago committed by GitHub
parent 8bd368d07e
commit f9a6d5c845
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -17,6 +17,18 @@ CHAT_MODEL_HEADERS = (
)
CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS)
DOCUMENT_LOADER_HEADERS = (
"## Overview",
"### Integration details",
"### Loader features",
"## Setup",
"## Instantiation",
"## Load",
"## Lazy Load",
"## API reference",
)
DOCUMENT_LOADER_REGEX = r".*".join(DOCUMENT_LOADER_HEADERS)
def check_chat_model(path: Path) -> None:
with open(path, "r") as f:
@ -29,12 +41,29 @@ def check_chat_model(path: Path) -> None:
)
def check_document_loader(path: Path) -> None:
with open(path, "r") as f:
doc = f.read()
if not re.search(DOCUMENT_LOADER_REGEX, doc, re.DOTALL):
raise ValueError(
f"Document {path} does not match the DocumentLoader Integration page template. "
f"Please see https://github.com/langchain-ai/langchain/issues/22866 for "
f"instructions on how to correctly format a DocumentLoader Integration page."
)
def main(*new_doc_paths: Union[str, Path]) -> None:
for path in new_doc_paths:
path = Path(path).resolve().absolute()
if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents:
print(f"Checking chat model page {path}")
check_chat_model(path)
elif (
CURR_DIR.parent / "docs" / "integrations" / "document_loaders"
in path.parents
):
print(f"Checking document loader page {path}")
check_document_loader(path)
else:
continue

Loading…
Cancel
Save