mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
05fd6a16a9
Thank you for contributing to LangChain! - [x] **PR title**: "community: chat models wrapper for Cloudflare Workers AI" - [x] **PR message**: - **Description:** Add chat models wrapper for Cloudflare Workers AI. Enables Langgraph intergration via ChatModel for tool usage, agentic usage. - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Chester Curme <chester.curme@gmail.com>
99 lines
2.9 KiB
Python
99 lines
2.9 KiB
Python
import json
|
|
import re
|
|
import sys
|
|
from functools import cache
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Union
|
|
|
|
CURR_DIR = Path(__file__).parent.absolute()
|
|
CLI_TEMPLATE_DIR = (
|
|
CURR_DIR.parent.parent / "libs/cli/langchain_cli/integration_template/docs"
|
|
)
|
|
|
|
INFO_BY_DIR: Dict[str, Dict[str, Union[int, str]]] = {
|
|
"chat": {
|
|
"issue_number": 22296,
|
|
},
|
|
"document_loaders": {
|
|
"issue_number": 22866,
|
|
},
|
|
"stores": {"issue_number": 24888},
|
|
"llms": {
|
|
"issue_number": 24803,
|
|
},
|
|
"text_embedding": {"issue_number": 14856},
|
|
"toolkits": {"issue_number": 24820},
|
|
"tools": {"issue_number": "TODO"},
|
|
"vectorstores": {"issue_number": 24800},
|
|
"retrievers": {"issue_number": 24908},
|
|
}
|
|
|
|
|
|
@cache
|
|
def _get_headers(doc_dir: str) -> Iterable[str]:
|
|
"""Gets all markdown headers ## and below from the integration template.
|
|
|
|
Ignores headers that contain "TODO"."""
|
|
ipynb_name = f"{doc_dir}.ipynb"
|
|
if not (CLI_TEMPLATE_DIR / ipynb_name).exists():
|
|
raise FileNotFoundError(f"Could not find {ipynb_name} in {CLI_TEMPLATE_DIR}")
|
|
with open(CLI_TEMPLATE_DIR / ipynb_name, "r") as f:
|
|
nb = json.load(f)
|
|
|
|
headers: List[str] = []
|
|
for cell in nb["cells"]:
|
|
if cell["cell_type"] == "markdown":
|
|
for line in cell["source"]:
|
|
if not line.startswith("## ") or "TODO" in line:
|
|
continue
|
|
header = line.strip()
|
|
headers.append(header)
|
|
return headers
|
|
|
|
|
|
def check_header_order(path: Path) -> None:
|
|
if path.name.startswith("index."):
|
|
# skip index pages
|
|
return
|
|
doc_dir = path.parent.name
|
|
if doc_dir not in INFO_BY_DIR:
|
|
# Skip if not a directory we care about
|
|
return
|
|
headers = _get_headers(doc_dir)
|
|
issue_number = INFO_BY_DIR[doc_dir].get("issue_number", "nonexistent")
|
|
|
|
print(f"Checking {doc_dir} page {path}")
|
|
|
|
with open(path, "r") as f:
|
|
doc = f.read()
|
|
notfound = []
|
|
for header in headers:
|
|
index = doc.find(header)
|
|
if index == -1:
|
|
notfound.append(header)
|
|
doc = doc[index + len(header) :]
|
|
if notfound:
|
|
notfound_headers = "\n- ".join(notfound)
|
|
raise ValueError(
|
|
f"Document {path} is missing headers:"
|
|
"\n- "
|
|
f"{notfound_headers}"
|
|
"\n\n"
|
|
"Please see https://github.com/langchain-ai/langchain/issues/"
|
|
f"{issue_number} for instructions on how to correctly format a "
|
|
f"{doc_dir} integration page."
|
|
)
|
|
|
|
|
|
def main(*new_doc_paths: Union[str, Path]) -> None:
|
|
for path in new_doc_paths:
|
|
path = Path(path).resolve().absolute()
|
|
if CURR_DIR.parent / "docs" / "integrations" in path.parents:
|
|
check_header_order(path)
|
|
else:
|
|
continue
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(*sys.argv[1:])
|