diff --git a/libs/core/langchain_core/indexing/api.py b/libs/core/langchain_core/indexing/api.py
index 01bc9bffb2..86741ae3a2 100644
--- a/libs/core/langchain_core/indexing/api.py
+++ b/libs/core/langchain_core/indexing/api.py
@@ -214,10 +214,18 @@ def index(
      are not able to specify the uid of the document.
 
     IMPORTANT:
-       if auto_cleanup is set to True, the loader should be returning
-       the entire dataset, and not just a subset of the dataset.
-       Otherwise, the auto_cleanup will remove documents that it is not
-       supposed to.
+       * if auto_cleanup is set to True, the loader should be returning
+         the entire dataset, and not just a subset of the dataset.
+         Otherwise, the auto_cleanup will remove documents that it is not
+         supposed to.
+       * In incremental mode, if documents associated with a particular
+         source id appear across different batches, the indexing API
+         will do some redundant work. This will still result in the
+         correct end state of the index, but will unfortunately not be
+         100% efficient. For example, if a given document is split into 15
+         chunks, and we index them using a batch size of 5, we'll have 3 batches
+         all with the same source id. In general, to avoid doing too much
+         redundant work select as big a batch size as possible.
 
     Args:
         docs_source: Data loader or iterable of documents to index.
diff --git a/libs/core/langchain_core/indexing/base.py b/libs/core/langchain_core/indexing/base.py
index 776f1f1089..ac73191bbe 100644
--- a/libs/core/langchain_core/indexing/base.py
+++ b/libs/core/langchain_core/indexing/base.py
@@ -5,7 +5,39 @@ from typing import List, Optional, Sequence
 
 
 class RecordManager(ABC):
-    """Abstract base class representing the interface for a record manager."""
+    """Abstract base class representing the interface for a record manager.
+
+    The record manager abstraction is used by the langchain indexing API.
+
+    The record manager keeps track of which documents have been
+    written into a vectorstore and when they were written.
+
+    The indexing API computes hashes for each document and stores the hash
+    together with the write time and the source id in the record manager.
+
+    On subsequent indexing runs, the indexing API can check the record manager
+    to determine which documents have already been indexed and which have not.
+
+    This allows the indexing API to avoid re-indexing documents that have
+    already been indexed, and to only index new documents.
+
+    The main benefit of this abstraction is that it works across many vectorstores.
+    To be supported, a vectorstore needs to only support the ability to add and
+    delete documents by ID. Using the record manager, the indexing API will
+    be able to delete outdated documents and avoid redundant indexing of documents
+    that have already been indexed.
+
+    The main constraints of this abstraction are:
+
+    1. It relies on the time-stamps to determine which documents have been
+       indexed and which have not. This means that the time-stamps must be
+       monotonically increasing. The timestamp should be the timestamp
+       as measured by the server to minimize issues.
+    2. The record manager is currently implemented separately from the
+       vectorstore, which means that the overall system becomes distributed
+       and may create issues with consistency. For example, writing to
+       record manager succeeds but corresponding writing to vectorstore fails.
+    """
 
     def __init__(
         self,