"""Script for auto-generating api_reference.rst.""" import importlib import inspect import typing from enum import Enum from pathlib import Path from typing import Dict, List, Literal, Optional, Sequence, TypedDict, Union from pydantic import BaseModel ROOT_DIR = Path(__file__).parents[2].absolute() HERE = Path(__file__).parent PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain" EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental" WRITE_FILE = HERE / "api_reference.rst" EXP_WRITE_FILE = HERE / "experimental_api_reference.rst" ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"] class ClassInfo(TypedDict): """Information about a class.""" name: str """The name of the class.""" qualified_name: str """The fully qualified name of the class.""" kind: ClassKind """The kind of the class.""" is_public: bool """Whether the class is public or not.""" class FunctionInfo(TypedDict): """Information about a function.""" name: str """The name of the function.""" qualified_name: str """The fully qualified name of the function.""" is_public: bool """Whether the function is public or not.""" class ModuleMembers(TypedDict): """A dictionary of module members.""" classes_: Sequence[ClassInfo] functions: Sequence[FunctionInfo] def _load_module_members(module_path: str, namespace: str) -> ModuleMembers: """Load all members of a module. Args: module_path: Path to the module. namespace: the namespace of the module. Returns: list: A list of loaded module objects. """ classes_: List[ClassInfo] = [] functions: List[FunctionInfo] = [] module = importlib.import_module(module_path) for name, type_ in inspect.getmembers(module): if not hasattr(type_, "__module__"): continue if type_.__module__ != module_path: continue if inspect.isclass(type_): if type(type_) == typing._TypedDictMeta: # type: ignore kind: ClassKind = "TypedDict" elif issubclass(type_, Enum): kind = "enum" elif issubclass(type_, BaseModel): kind = "Pydantic" else: kind = "Regular" classes_.append( ClassInfo( name=name, qualified_name=f"{namespace}.{name}", kind=kind, is_public=not name.startswith("_"), ) ) elif inspect.isfunction(type_): functions.append( FunctionInfo( name=name, qualified_name=f"{namespace}.{name}", is_public=not name.startswith("_"), ) ) else: continue return ModuleMembers( classes_=classes_, functions=functions, ) def _merge_module_members( module_members: Sequence[ModuleMembers], ) -> ModuleMembers: """Merge module members.""" classes_: List[ClassInfo] = [] functions: List[FunctionInfo] = [] for module in module_members: classes_.extend(module["classes_"]) functions.extend(module["functions"]) return ModuleMembers( classes_=classes_, functions=functions, ) def _load_package_modules( package_directory: Union[str, Path], submodule: Optional[str] = None ) -> Dict[str, ModuleMembers]: """Recursively load modules of a package based on the file system. Traversal based on the file system makes it easy to determine which of the modules/packages are part of the package vs. 3rd party or built-in. Parameters: package_directory: Path to the package directory. submodule: Optional name of submodule to load. Returns: list: A list of loaded module objects. """ package_path = ( Path(package_directory) if isinstance(package_directory, str) else package_directory ) modules_by_namespace = {} # Get the high level package name package_name = package_path.name # If we are loading a submodule, add it in if submodule is not None: package_path = package_path / submodule for file_path in package_path.rglob("*.py"): if file_path.name.startswith("_"): continue relative_module_name = file_path.relative_to(package_path) # Skip if any module part starts with an underscore if any(part.startswith("_") for part in relative_module_name.parts): continue # Get the full namespace of the module namespace = str(relative_module_name).replace(".py", "").replace("/", ".") # Keep only the top level namespace top_namespace = namespace.split(".")[0] try: # If submodule is present, we need to construct the paths in a slightly # different way if submodule is not None: module_members = _load_module_members( f"{package_name}.{submodule}.{namespace}", f"{submodule}.{namespace}", ) else: module_members = _load_module_members( f"{package_name}.{namespace}", namespace ) # Merge module members if the namespace already exists if top_namespace in modules_by_namespace: existing_module_members = modules_by_namespace[top_namespace] _module_members = _merge_module_members( [existing_module_members, module_members] ) else: _module_members = module_members modules_by_namespace[top_namespace] = _module_members except ImportError as e: print(f"Error: Unable to import module '{namespace}' with error: {e}") return modules_by_namespace def _construct_doc(pkg: str, members_by_namespace: Dict[str, ModuleMembers]) -> str: """Construct the contents of the reference.rst file for the given package. Args: pkg: The package name members_by_namespace: The members of the package, dict organized by top level module contains a list of classes and functions inside of the top level namespace. Returns: The contents of the reference.rst file. """ full_doc = f"""\ ======================= ``{pkg}`` API Reference ======================= """ namespaces = sorted(members_by_namespace) for module in namespaces: _members = members_by_namespace[module] classes = _members["classes_"] functions = _members["functions"] if not (classes or functions): continue section = f":mod:`{pkg}.{module}`" underline = "=" * (len(section) + 1) full_doc += f"""\ {section} {underline} .. automodule:: {pkg}.{module} :no-members: :no-inherited-members: """ if classes: full_doc += f"""\ Classes -------------- .. currentmodule:: {pkg} .. autosummary:: :toctree: {module} """ for class_ in sorted(classes, key=lambda c: c["qualified_name"]): if not class_["is_public"]: continue if class_["kind"] == "TypedDict": template = "typeddict.rst" elif class_["kind"] == "enum": template = "enum.rst" elif class_["kind"] == "Pydantic": template = "pydantic.rst" else: template = "class.rst" full_doc += f"""\ :template: {template} {class_["qualified_name"]} """ if functions: _functions = [f["qualified_name"] for f in functions if f["is_public"]] fstring = "\n ".join(sorted(_functions)) full_doc += f"""\ Functions -------------- .. currentmodule:: {pkg} .. autosummary:: :toctree: {module} :template: function.rst {fstring} """ return full_doc def _document_langchain_experimental() -> None: """Document the langchain_experimental package.""" # Generate experimental_api_reference.rst exp_members = _load_package_modules(EXP_DIR) exp_doc = ".. _experimental_api_reference:\n\n" + _construct_doc( "langchain_experimental", exp_members ) with open(EXP_WRITE_FILE, "w") as f: f.write(exp_doc) def _document_langchain_core() -> None: """Document the main langchain package.""" # load top level module members lc_members = _load_package_modules(PKG_DIR) # Add additional packages tools = _load_package_modules(PKG_DIR, "tools") agents = _load_package_modules(PKG_DIR, "agents") schema = _load_package_modules(PKG_DIR, "schema") lc_members.update( { "agents.output_parsers": agents["output_parsers"], "agents.format_scratchpad": agents["format_scratchpad"], "tools.render": tools["render"], "schema.runnable": schema["runnable"], } ) lc_doc = ".. _api_reference:\n\n" + _construct_doc("langchain", lc_members) with open(WRITE_FILE, "w") as f: f.write(lc_doc) def main() -> None: """Generate the reference.rst file for each package.""" _document_langchain_core() _document_langchain_experimental() if __name__ == "__main__": main()