2024-06-11 20:26:35 +00:00
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import Union
|
|
|
|
|
|
|
|
CURR_DIR = Path(__file__).parent.absolute()
|
|
|
|
|
|
|
|
CHAT_MODEL_HEADERS = (
|
|
|
|
"## Overview",
|
|
|
|
"### Integration details",
|
|
|
|
"### Model features",
|
|
|
|
"## Setup",
|
|
|
|
"## Instantiation",
|
|
|
|
"## Invocation",
|
|
|
|
"## Chaining",
|
|
|
|
"## API reference",
|
|
|
|
)
|
|
|
|
CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS)
|
|
|
|
|
2024-06-14 02:34:50 +00:00
|
|
|
DOCUMENT_LOADER_HEADERS = (
|
|
|
|
"## Overview",
|
|
|
|
"### Integration details",
|
|
|
|
"### Loader features",
|
|
|
|
"## Setup",
|
|
|
|
"## Instantiation",
|
|
|
|
"## Load",
|
|
|
|
"## Lazy Load",
|
|
|
|
"## API reference",
|
|
|
|
)
|
|
|
|
DOCUMENT_LOADER_REGEX = r".*".join(DOCUMENT_LOADER_HEADERS)
|
|
|
|
|
2024-06-11 20:26:35 +00:00
|
|
|
|
|
|
|
def check_chat_model(path: Path) -> None:
|
|
|
|
with open(path, "r") as f:
|
|
|
|
doc = f.read()
|
|
|
|
if not re.search(CHAT_MODEL_REGEX, doc, re.DOTALL):
|
|
|
|
raise ValueError(
|
|
|
|
f"Document {path} does not match the ChatModel Integration page template. "
|
|
|
|
f"Please see https://github.com/langchain-ai/langchain/issues/22296 for "
|
|
|
|
f"instructions on how to correctly format a ChatModel Integration page."
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-06-14 02:34:50 +00:00
|
|
|
def check_document_loader(path: Path) -> None:
|
|
|
|
with open(path, "r") as f:
|
|
|
|
doc = f.read()
|
|
|
|
if not re.search(DOCUMENT_LOADER_REGEX, doc, re.DOTALL):
|
|
|
|
raise ValueError(
|
|
|
|
f"Document {path} does not match the DocumentLoader Integration page template. "
|
|
|
|
f"Please see https://github.com/langchain-ai/langchain/issues/22866 for "
|
|
|
|
f"instructions on how to correctly format a DocumentLoader Integration page."
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-06-11 20:26:35 +00:00
|
|
|
def main(*new_doc_paths: Union[str, Path]) -> None:
|
|
|
|
for path in new_doc_paths:
|
|
|
|
path = Path(path).resolve().absolute()
|
|
|
|
if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents:
|
|
|
|
print(f"Checking chat model page {path}")
|
|
|
|
check_chat_model(path)
|
2024-06-14 02:34:50 +00:00
|
|
|
elif (
|
|
|
|
CURR_DIR.parent / "docs" / "integrations" / "document_loaders"
|
|
|
|
in path.parents
|
|
|
|
):
|
|
|
|
print(f"Checking document loader page {path}")
|
|
|
|
check_document_loader(path)
|
2024-06-11 20:26:35 +00:00
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main(*sys.argv[1:])
|