diff --git a/.github/workflows/check_new_docs.yml b/.github/workflows/check_new_docs.yml new file mode 100644 index 0000000000..09fdd4d18e --- /dev/null +++ b/.github/workflows/check_new_docs.yml @@ -0,0 +1,31 @@ +--- +name: Integration docs lint + +on: + push: + branches: [master] + pull_request: + +# If another push to the same PR or branch happens while this workflow is still running, +# cancel the earlier run in favor of the next run. +# +# There's no point in testing an outdated version of the code. GitHub only allows +# a limited number of job runners to be active at the same time, so it's better to cancel +# pointless jobs early so that more useful jobs can run sooner. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - id: files + uses: Ana06/get-changed-files@v2.2.0 + - name: Check new docs + run: | + python docs/scripts/check_templates.py ${{ steps.files.outputs.added }} diff --git a/docs/scripts/check_templates.py b/docs/scripts/check_templates.py new file mode 100644 index 0000000000..ca551dc95c --- /dev/null +++ b/docs/scripts/check_templates.py @@ -0,0 +1,43 @@ +import re +import sys +from pathlib import Path +from typing import Union + +CURR_DIR = Path(__file__).parent.absolute() + +CHAT_MODEL_HEADERS = ( + "## Overview", + "### Integration details", + "### Model features", + "## Setup", + "## Instantiation", + "## Invocation", + "## Chaining", + "## API reference", +) +CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS) + + +def check_chat_model(path: Path) -> None: + with open(path, "r") as f: + doc = f.read() + if not re.search(CHAT_MODEL_REGEX, doc, re.DOTALL): + raise ValueError( + f"Document {path} does not match the ChatModel Integration page template. " + f"Please see https://github.com/langchain-ai/langchain/issues/22296 for " + f"instructions on how to correctly format a ChatModel Integration page." + ) + + +def main(*new_doc_paths: Union[str, Path]) -> None: + for path in new_doc_paths: + path = Path(path).resolve().absolute() + if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents: + print(f"Checking chat model page {path}") + check_chat_model(path) + else: + continue + + +if __name__ == "__main__": + main(*sys.argv[1:])