docs: automatically add api ref links (#20755)

![Screenshot 2024-04-22 at 1 51 13 PM](https://github.com/langchain-ai/langchain/assets/22008038/b8b09fec-3800-4b97-bd26-5571b8308f4a)
2024-10-31 15:20:26 +00:00 · 2024-04-22 14:05:29 -07:00 · 2024-04-22 14:05:29 -07:00 · 2a11a30572
commit 2a11a30572
parent 936c6cc74a
4 changed files with 23 additions and 6 deletions
--- a/docs/.local_build.sh
+++ b/docs/.local_build.sh
@ -19,6 +19,9 @@ poetry run python scripts/copy_templates.py
 wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md
 wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md
 yarn
-poetry run quarto preview docs
+poetry run quarto render docs
 poetry run python scripts/generate_api_reference_links.py  --docs_dir docs
 yarn
 yarn start
--- a/docs/scripts/generate_api_reference_links.py
+++ b/docs/scripts/generate_api_reference_links.py
@ -16,7 +16,7 @@ _BASE_URL = "https://api.python.langchain.com/en/latest/"
 code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
 # Regular expression to match langchain import lines
 _IMPORT_RE = re.compile(
-    r"from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+"
+    r"from\s+(langchain(?:_\w+)?\.\w+(?:\.\w+)*?)\s+import\s+"
    r"((?:\w+(?:,\s*)?)*"  # Match zero or more words separated by a comma+optional ws
    r"(?:\s*\(.*?\))?)",  # Match optional parentheses block
    re.DOTALL,  # Match newlines as well
@ -70,7 +70,9 @@ def main():
        if file_imports:
            # Use relative file path as key
            relative_path = (
-                os.path.relpath(file, _DOCS_DIR).replace(".mdx", "").replace(".md", "")
+                os.path.relpath(file, args.docs_dir)
                .replace(".mdx", "/")
                .replace(".md", "/")
            )
            doc_url = f"https://python.langchain.com/docs/{relative_path}"
@ -122,8 +124,10 @@ def replace_imports(file):
        imports = []
        for import_match in _IMPORT_RE.finditer(code):
            module = import_match.group(1)
            if "pydantic_v1" in module:
                continue
            imports_str = (
-                import_match.group(3).replace("(\n", "").replace("\n)", "")
+                import_match.group(2).replace("(\n", "").replace("\n)", "")
            )  # Handle newlines within parentheses
            # remove any newline and spaces, then split by comma
            imported_classes = [
@ -140,7 +144,8 @@ def replace_imports(file):
                except ImportError as e:
                    logger.warning(f"Failed to load for class {class_name}, {e}")
                    continue
-
+                if len(module_path.split(".")) < 2:
                    continue
                url = (
                    _BASE_URL
                    + module_path.split(".")[1]
--- a/docs/vercel_build.sh
+++ b/docs/vercel_build.sh
@ -33,3 +33,4 @@ python3 scripts/resolve_local_links.py docs/langgraph.md https://github.com/lang
 # render
 quarto render docs/
 python3 scripts/generate_api_reference_links.py --docs_dir docs
--- a/docs/vercel_requirements.txt
+++ b/docs/vercel_requirements.txt
@ -1,4 +1,12 @@
 -e ../libs/langchain
 -e ../libs/community
 -e ../libs/core
 -e ../libs/experimental
 -e ../libs/text-splitters
 langchain-cohere
 langchain-astradb
 langchain-nvidia-ai-endpoints
 langchain-nvidia-trt
 langchain-elasticsearch
 langchain-postgres
 urllib3==1.26.18