mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
327 lines
9.3 KiB
Python
327 lines
9.3 KiB
Python
"""Script for auto-generating api_reference.rst."""
|
|
import importlib
|
|
import inspect
|
|
import typing
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Dict, List, Literal, Optional, Sequence, TypedDict, Union
|
|
|
|
from pydantic import BaseModel
|
|
|
|
ROOT_DIR = Path(__file__).parents[2].absolute()
|
|
HERE = Path(__file__).parent
|
|
|
|
PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
|
|
EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
|
|
WRITE_FILE = HERE / "api_reference.rst"
|
|
EXP_WRITE_FILE = HERE / "experimental_api_reference.rst"
|
|
|
|
|
|
ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"]
|
|
|
|
|
|
class ClassInfo(TypedDict):
|
|
"""Information about a class."""
|
|
|
|
name: str
|
|
"""The name of the class."""
|
|
qualified_name: str
|
|
"""The fully qualified name of the class."""
|
|
kind: ClassKind
|
|
"""The kind of the class."""
|
|
is_public: bool
|
|
"""Whether the class is public or not."""
|
|
|
|
|
|
class FunctionInfo(TypedDict):
|
|
"""Information about a function."""
|
|
|
|
name: str
|
|
"""The name of the function."""
|
|
qualified_name: str
|
|
"""The fully qualified name of the function."""
|
|
is_public: bool
|
|
"""Whether the function is public or not."""
|
|
|
|
|
|
class ModuleMembers(TypedDict):
|
|
"""A dictionary of module members."""
|
|
|
|
classes_: Sequence[ClassInfo]
|
|
functions: Sequence[FunctionInfo]
|
|
|
|
|
|
def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
|
"""Load all members of a module.
|
|
|
|
Args:
|
|
module_path: Path to the module.
|
|
namespace: the namespace of the module.
|
|
|
|
Returns:
|
|
list: A list of loaded module objects.
|
|
"""
|
|
classes_: List[ClassInfo] = []
|
|
functions: List[FunctionInfo] = []
|
|
module = importlib.import_module(module_path)
|
|
for name, type_ in inspect.getmembers(module):
|
|
if not hasattr(type_, "__module__"):
|
|
continue
|
|
if type_.__module__ != module_path:
|
|
continue
|
|
|
|
if inspect.isclass(type_):
|
|
if type(type_) == typing._TypedDictMeta: # type: ignore
|
|
kind: ClassKind = "TypedDict"
|
|
elif issubclass(type_, Enum):
|
|
kind = "enum"
|
|
elif issubclass(type_, BaseModel):
|
|
kind = "Pydantic"
|
|
else:
|
|
kind = "Regular"
|
|
|
|
classes_.append(
|
|
ClassInfo(
|
|
name=name,
|
|
qualified_name=f"{namespace}.{name}",
|
|
kind=kind,
|
|
is_public=not name.startswith("_"),
|
|
)
|
|
)
|
|
elif inspect.isfunction(type_):
|
|
functions.append(
|
|
FunctionInfo(
|
|
name=name,
|
|
qualified_name=f"{namespace}.{name}",
|
|
is_public=not name.startswith("_"),
|
|
)
|
|
)
|
|
else:
|
|
continue
|
|
|
|
return ModuleMembers(
|
|
classes_=classes_,
|
|
functions=functions,
|
|
)
|
|
|
|
|
|
def _merge_module_members(
|
|
module_members: Sequence[ModuleMembers],
|
|
) -> ModuleMembers:
|
|
"""Merge module members."""
|
|
classes_: List[ClassInfo] = []
|
|
functions: List[FunctionInfo] = []
|
|
for module in module_members:
|
|
classes_.extend(module["classes_"])
|
|
functions.extend(module["functions"])
|
|
|
|
return ModuleMembers(
|
|
classes_=classes_,
|
|
functions=functions,
|
|
)
|
|
|
|
|
|
def _load_package_modules(
|
|
package_directory: Union[str, Path], submodule: Optional[str] = None
|
|
) -> Dict[str, ModuleMembers]:
|
|
"""Recursively load modules of a package based on the file system.
|
|
|
|
Traversal based on the file system makes it easy to determine which
|
|
of the modules/packages are part of the package vs. 3rd party or built-in.
|
|
|
|
Parameters:
|
|
package_directory: Path to the package directory.
|
|
submodule: Optional name of submodule to load.
|
|
|
|
Returns:
|
|
list: A list of loaded module objects.
|
|
"""
|
|
package_path = (
|
|
Path(package_directory)
|
|
if isinstance(package_directory, str)
|
|
else package_directory
|
|
)
|
|
modules_by_namespace = {}
|
|
|
|
# Get the high level package name
|
|
package_name = package_path.name
|
|
|
|
# If we are loading a submodule, add it in
|
|
if submodule is not None:
|
|
package_path = package_path / submodule
|
|
|
|
for file_path in package_path.rglob("*.py"):
|
|
if file_path.name.startswith("_"):
|
|
continue
|
|
|
|
relative_module_name = file_path.relative_to(package_path)
|
|
|
|
# Skip if any module part starts with an underscore
|
|
if any(part.startswith("_") for part in relative_module_name.parts):
|
|
continue
|
|
|
|
# Get the full namespace of the module
|
|
namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
|
|
# Keep only the top level namespace
|
|
top_namespace = namespace.split(".")[0]
|
|
|
|
try:
|
|
# If submodule is present, we need to construct the paths in a slightly
|
|
# different way
|
|
if submodule is not None:
|
|
module_members = _load_module_members(
|
|
f"{package_name}.{submodule}.{namespace}",
|
|
f"{submodule}.{namespace}",
|
|
)
|
|
else:
|
|
module_members = _load_module_members(
|
|
f"{package_name}.{namespace}", namespace
|
|
)
|
|
# Merge module members if the namespace already exists
|
|
if top_namespace in modules_by_namespace:
|
|
existing_module_members = modules_by_namespace[top_namespace]
|
|
_module_members = _merge_module_members(
|
|
[existing_module_members, module_members]
|
|
)
|
|
else:
|
|
_module_members = module_members
|
|
|
|
modules_by_namespace[top_namespace] = _module_members
|
|
|
|
except ImportError as e:
|
|
print(f"Error: Unable to import module '{namespace}' with error: {e}")
|
|
|
|
return modules_by_namespace
|
|
|
|
|
|
def _construct_doc(pkg: str, members_by_namespace: Dict[str, ModuleMembers]) -> str:
|
|
"""Construct the contents of the reference.rst file for the given package.
|
|
|
|
Args:
|
|
pkg: The package name
|
|
members_by_namespace: The members of the package, dict organized by top level
|
|
module contains a list of classes and functions
|
|
inside of the top level namespace.
|
|
|
|
Returns:
|
|
The contents of the reference.rst file.
|
|
"""
|
|
full_doc = f"""\
|
|
=======================
|
|
``{pkg}`` API Reference
|
|
=======================
|
|
|
|
"""
|
|
namespaces = sorted(members_by_namespace)
|
|
|
|
for module in namespaces:
|
|
_members = members_by_namespace[module]
|
|
classes = _members["classes_"]
|
|
functions = _members["functions"]
|
|
if not (classes or functions):
|
|
continue
|
|
section = f":mod:`{pkg}.{module}`"
|
|
underline = "=" * (len(section) + 1)
|
|
full_doc += f"""\
|
|
{section}
|
|
{underline}
|
|
|
|
.. automodule:: {pkg}.{module}
|
|
:no-members:
|
|
:no-inherited-members:
|
|
|
|
"""
|
|
|
|
if classes:
|
|
full_doc += f"""\
|
|
Classes
|
|
--------------
|
|
.. currentmodule:: {pkg}
|
|
|
|
.. autosummary::
|
|
:toctree: {module}
|
|
"""
|
|
|
|
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
|
if not class_["is_public"]:
|
|
continue
|
|
|
|
if class_["kind"] == "TypedDict":
|
|
template = "typeddict.rst"
|
|
elif class_["kind"] == "enum":
|
|
template = "enum.rst"
|
|
elif class_["kind"] == "Pydantic":
|
|
template = "pydantic.rst"
|
|
else:
|
|
template = "class.rst"
|
|
|
|
full_doc += f"""\
|
|
:template: {template}
|
|
|
|
{class_["qualified_name"]}
|
|
|
|
"""
|
|
|
|
if functions:
|
|
_functions = [f["qualified_name"] for f in functions if f["is_public"]]
|
|
fstring = "\n ".join(sorted(_functions))
|
|
full_doc += f"""\
|
|
Functions
|
|
--------------
|
|
.. currentmodule:: {pkg}
|
|
|
|
.. autosummary::
|
|
:toctree: {module}
|
|
:template: function.rst
|
|
|
|
{fstring}
|
|
|
|
"""
|
|
return full_doc
|
|
|
|
|
|
def _document_langchain_experimental() -> None:
|
|
"""Document the langchain_experimental package."""
|
|
# Generate experimental_api_reference.rst
|
|
exp_members = _load_package_modules(EXP_DIR)
|
|
exp_doc = ".. _experimental_api_reference:\n\n" + _construct_doc(
|
|
"langchain_experimental", exp_members
|
|
)
|
|
with open(EXP_WRITE_FILE, "w") as f:
|
|
f.write(exp_doc)
|
|
|
|
|
|
def _document_langchain_core() -> None:
|
|
"""Document the main langchain package."""
|
|
# load top level module members
|
|
lc_members = _load_package_modules(PKG_DIR)
|
|
|
|
# Add additional packages
|
|
tools = _load_package_modules(PKG_DIR, "tools")
|
|
agents = _load_package_modules(PKG_DIR, "agents")
|
|
schema = _load_package_modules(PKG_DIR, "schema")
|
|
|
|
lc_members.update(
|
|
{
|
|
"agents.output_parsers": agents["output_parsers"],
|
|
"agents.format_scratchpad": agents["format_scratchpad"],
|
|
"tools.render": tools["render"],
|
|
"schema.runnable": schema["runnable"],
|
|
}
|
|
)
|
|
|
|
lc_doc = ".. _api_reference:\n\n" + _construct_doc("langchain", lc_members)
|
|
|
|
with open(WRITE_FILE, "w") as f:
|
|
f.write(lc_doc)
|
|
|
|
|
|
def main() -> None:
|
|
"""Generate the reference.rst file for each package."""
|
|
_document_langchain_core()
|
|
_document_langchain_experimental()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|