docs: Fix AstraDBVectorStore docstring (#17706)

2024-11-18 09:25:54 +00:00 · 2024-02-22 01:53:08 +01:00 · 2024-02-22 01:53:08 +01:00 · 0e26b16930
commit 0e26b16930
parent 66e1005898
1 changed files with 269 additions and 172 deletions
--- a/libs/partners/astradb/langchain_astradb/vectorstores.py
+++ b/libs/partners/astradb/langchain_astradb/vectorstores.py
@ -67,82 +67,6 @@ def _unique_list(lst: List[T], key: Callable[[T], U]) -> List[T]:


 class AstraDBVectorStore(VectorStore):
-    """Wrapper around DataStax Astra DB for vector-store workloads.
-
-    For quickstart and details, visit:
-        docs.datastax.com/en/astra/home/astra.html
-
-    Example:
-        .. code-block:: python
-
-                from langchain_astradb.vectorstores import AstraDBVectorStore
-                from langchain_openai.embeddings import OpenAIEmbeddings
-
-                embeddings = OpenAIEmbeddings()
-                vectorstore = AstraDBVectorStore(
-                  embedding=embeddings,
-                  collection_name="my_store",
-                  token="AstraCS:...",
-                  api_endpoint="https://<DB-ID>-<REGION>.apps.astra.datastax.com"
-                )
-
-                vectorstore.add_texts(["Giraffes", "All good here"])
-                results = vectorstore.similarity_search("Everything's ok", k=1)
-
-      Constructor Args (only keyword-arguments accepted):
-          embedding (Embeddings): embedding function to use.
-          collection_name (str): name of the Astra DB collection to create/use.
-          token (Optional[str]): API token for Astra DB usage.
-          api_endpoint (Optional[str]): full URL to the API endpoint,
-              such as "https://<DB-ID>-us-east1.apps.astra.datastax.com".
-          astra_db_client (Optional[astrapy.db.AstraDB]):
-              *alternative to token+api_endpoint*,
-              you can pass an already-created 'astrapy.db.AstraDB' instance.
-          async_astra_db_client (Optional[astrapy.db.AsyncAstraDB]):
-              same as `astra_db_client`, but the basis for the async API
-              of the vector store.
-          namespace (Optional[str]): namespace (aka keyspace) where the
-              collection is created. Defaults to the database's "default namespace".
-          metric (Optional[str]): similarity function to use out of those
-              available in Astra DB. If left out, it will use Astra DB API's
-              defaults (i.e. "cosine" - but, for performance reasons,
-              "dot_product" is suggested if embeddings are normalized to one).
-
-      Advanced arguments (coming with sensible defaults):
-          batch_size (Optional[int]): Size of batches for bulk insertions.
-          bulk_insert_batch_concurrency (Optional[int]): Number of threads
-              to insert batches concurrently.
-          bulk_insert_overwrite_concurrency (Optional[int]): Number of
-              threads in a batch to insert pre-existing entries.
-          bulk_delete_concurrency (Optional[int]): Number of threads
-              (for deleting multiple rows concurrently).
-          pre_delete_collection (Optional[bool]): whether to delete the collection
-              before creating it. If False and the collection already exists,
-              the collection will be used as is.
-
-      A note on concurrency: as a rule of thumb, on a typical client machine
-      it is suggested to keep the quantity
-          bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency
-      much below 1000 to avoid exhausting the client multithreading/networking
-      resources. The hardcoded defaults are somewhat conservative to meet
-      most machines' specs, but a sensible choice to test may be:
-          bulk_insert_batch_concurrency = 80
-          bulk_insert_overwrite_concurrency = 10
-      A bit of experimentation is required to nail the best results here,
-      depending on both the machine/network specs and the expected workload
-      (specifically, how often a write is an update of an existing id).
-      Remember you can pass concurrency settings to individual calls to
-      add_texts and add_documents as well.
-
-      A note on passing astra_db_client and/or async_astra_db_client instead
-      of the credentials (token, api_endpoint):
-      - if you pass only the async client when creating the store,
-        the sync methods will error when called.
-      - conversely, if you pass only the sync client, the async methods will
-        still be available, but will be wrapping its sync counterpart
-        in a `run_in_executor` construct instead of using the native async.
-    """
-
    @staticmethod
    def _filter_to_metadata(filter_dict: Optional[Dict[str, Any]]) -> Dict[str, Any]:
        if filter_dict is None:
@ -180,10 +104,71 @@ class AstraDBVectorStore(VectorStore):
        bulk_delete_concurrency: Optional[int] = None,
        pre_delete_collection: bool = False,
    ) -> None:
-        """
-        Create an AstraDBVectorStore vector store object. See class docstring for help.
-        """
+        """Wrapper around DataStax Astra DB for vector-store workloads.

+        For quickstart and details, visit
+        https://docs.datastax.com/en/astra/astra-db-vector/
+
+        Example:
+            .. code-block:: python
+
+                from langchain_astradb.vectorstores import AstraDBVectorStore
+                from langchain_openai.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                vectorstore = AstraDBVectorStore(
+                    embedding=embeddings,
+                    collection_name="my_store",
+                    token="AstraCS:...",
+                    api_endpoint="https://<DB-ID>-<REGION>.apps.astra.datastax.com"
+                )
+
+                vectorstore.add_texts(["Giraffes", "All good here"])
+                results = vectorstore.similarity_search("Everything's ok", k=1)
+
+        Args:
+            embedding: embedding function to use.
+            collection_name: name of the Astra DB collection to create/use.
+            token: API token for Astra DB usage.
+            api_endpoint: full URL to the API endpoint, such as
+                `https://<DB-ID>-us-east1.apps.astra.datastax.com`.
+            astra_db_client: *alternative to token+api_endpoint*,
+                you can pass an already-created 'astrapy.db.AstraDB' instance.
+            async_astra_db_client: *alternative to token+api_endpoint*,
+                you can pass an already-created 'astrapy.db.AsyncAstraDB' instance.
+            namespace: namespace (aka keyspace) where the collection is created.
+                Defaults to the database's "default namespace".
+            metric: similarity function to use out of those available in Astra DB.
+                If left out, it will use Astra DB API's defaults (i.e. "cosine" - but,
+                for performance reasons, "dot_product" is suggested if embeddings are
+                normalized to one).
+            batch_size: Size of batches for bulk insertions.
+            bulk_insert_batch_concurrency: Number of threads or coroutines to insert
+                batches concurrently.
+            bulk_insert_overwrite_concurrency: Number of threads  or coroutines in a
+                batch to insert pre-existing entries.
+            bulk_delete_concurrency: Number of threads (for deleting multiple rows
+                concurrently).
+            pre_delete_collection: whether to delete the collection before creating it.
+                If False and the collection already exists, the collection will be used
+                as is.
+
+        Note:
+            For concurrency in synchronous :meth:`~add_texts`:, as a rule of thumb, on a
+            typical client machine it is suggested to keep the quantity
+            bulk_insert_batch_concurrency * bulk_insert_overwrite_concurrency
+            much below 1000 to avoid exhausting the client multithreading/networking
+            resources. The hardcoded defaults are somewhat conservative to meet
+            most machines' specs, but a sensible choice to test may be:
+
+            - bulk_insert_batch_concurrency = 80
+            - bulk_insert_overwrite_concurrency = 10
+
+            A bit of experimentation is required to nail the best results here,
+            depending on both the machine/network specs and the expected workload
+            (specifically, how often a write is an update of an existing id).
+            Remember you can pass concurrency settings to individual calls to
+            :meth:`~add_texts` and :meth:`~add_documents` as well.
+        """
        # Conflicting-arg checks:
        if astra_db_client is not None or async_astra_db_client is not None:
            if token is not None or api_endpoint is not None:
@ -349,8 +334,13 @@ class AstraDBVectorStore(VectorStore):

    def delete_by_document_id(self, document_id: str) -> bool:
        """
-        Remove a single document from the store, given its document_id (str).
-        Return True if a document has indeed been deleted, False if ID not found.
+        Remove a single document from the store, given its document ID.
+
+        Args:
+            document_id: The document ID
+
+        Returns
+            True if a document has indeed been deleted, False if ID not found.
        """
        self._ensure_astra_db_client()
        # self.collection is not None (by _ensure_astra_db_client)
@ -361,8 +351,13 @@ class AstraDBVectorStore(VectorStore):

    async def adelete_by_document_id(self, document_id: str) -> bool:
        """
-        Remove a single document from the store, given its document_id (str).
-        Return True if a document has indeed been deleted, False if ID not found.
+        Remove a single document from the store, given its document ID.
+
+        Args:
+            document_id: The document ID
+
+        Returns
+            True if a document has indeed been deleted, False if ID not found.
        """
        await self._ensure_db_setup()
        if not self.async_collection:
@ -381,13 +376,12 @@ class AstraDBVectorStore(VectorStore):
        """Delete by vector ids.

        Args:
-            ids (Optional[List[str]]): List of ids to delete.
-            concurrency (Optional[int]): max number of threads issuing
-                single-doc delete requests. Defaults to instance-level setting.
+            ids: List of ids to delete.
+            concurrency: max number of threads issuing single-doc delete requests.
+                Defaults to instance-level setting.

        Returns:
-            Optional[bool]: True if deletion is successful,
-                False otherwise, None if not implemented.
+            True if deletion is successful, False otherwise.
        """

        if kwargs:
@ -416,17 +410,15 @@ class AstraDBVectorStore(VectorStore):
        concurrency: Optional[int] = None,
        **kwargs: Any,
    ) -> Optional[bool]:
-        """Delete by vector ID or other criteria.
+        """Delete by vector ids.

        Args:
            ids: List of ids to delete.
-            concurrency (Optional[int]): max number of concurrent delete queries.
+            concurrency: max concurrency of single-doc delete requests.
                Defaults to instance-level setting.
-            **kwargs: Other keyword arguments that subclasses might use.

        Returns:
-            Optional[bool]: True if deletion is successful,
-            False otherwise, None if not implemented.
+            True if deletion is successful, False otherwise.
        """
        if kwargs:
            warnings.warn(
@ -447,7 +439,7 @@ class AstraDBVectorStore(VectorStore):
    def delete_collection(self) -> None:
        """
        Completely delete the collection from the database (as opposed
-        to 'clear()', which empties it only).
+        to :meth:`~clear`, which empties it only).
        Stored data is lost and unrecoverable, resources are freed.
        Use with caution.
        """
@ -460,7 +452,7 @@ class AstraDBVectorStore(VectorStore):
    async def adelete_collection(self) -> None:
        """
        Completely delete the collection from the database (as opposed
-        to 'clear()', which empties it only).
+        to :meth:`~aclear`, which empties it only).
        Stored data is lost and unrecoverable, resources are freed.
        Use with caution.
        """
@ -553,28 +545,29 @@ class AstraDBVectorStore(VectorStore):
        will be replaced.

        Args:
-            texts (Iterable[str]): Texts to add to the vectorstore.
-            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
-            ids (Optional[List[str]], optional): Optional list of ids.
-            batch_size (Optional[int]): Number of documents in each API call.
+            texts: Texts to add to the vectorstore.
+            metadatas: Optional list of metadatas.
+            ids: Optional list of ids.
+            batch_size: Number of documents in each API call.
                Check the underlying Astra DB HTTP API specs for the max value
                (20 at the time of writing this). If not provided, defaults
                to the instance-level setting.
-            batch_concurrency (Optional[int]): number of threads to process
+            batch_concurrency: number of threads to process
                insertion batches concurrently. Defaults to instance-level
                setting if not provided.
-            overwrite_concurrency (Optional[int]):  number of threads to process
+            overwrite_concurrency:  number of threads to process
                pre-existing documents in each batch (which require individual
                API calls). Defaults to instance-level setting if not provided.

-        A note on metadata: there are constraints on the allowed field names
-        in this dictionary, coming from the underlying Astra DB API.
+        Note:
+            There are constraints on the allowed field names
+            in the metadata dictionaries, coming from the underlying Astra DB API.
            For instance, the `$` (dollar sign) cannot be used in the dict keys.
            See this document for details:
-            docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html
+            https://docs.datastax.com/en/astra/astra-db-vector/api-reference/data-api.html

        Returns:
-            List[str]: List of ids of the added texts.
+            The list of ids of the added texts.
        """

        if kwargs:
@ -649,27 +642,29 @@ class AstraDBVectorStore(VectorStore):
        will be replaced.

        Args:
-            texts (Iterable[str]): Texts to add to the vectorstore.
-            metadatas (Optional[List[dict]], optional): Optional list of metadatas.
-            ids (Optional[List[str]], optional): Optional list of ids.
-            batch_size (Optional[int]): Number of documents in each API call.
+            texts: Texts to add to the vectorstore.
+            metadatas: Optional list of metadatas.
+            ids: Optional list of ids.
+            batch_size: Number of documents in each API call.
                Check the underlying Astra DB HTTP API specs for the max value
                (20 at the time of writing this). If not provided, defaults
                to the instance-level setting.
-            batch_concurrency (Optional[int]): number of concurrent batch insertions.
-                Defaults to instance-level setting if not provided.
-            overwrite_concurrency (Optional[int]): number of concurrent API calls to
-                process pre-existing documents in each batch.
-                Defaults to instance-level setting if not provided.
+            batch_concurrency: number of threads to process
+                insertion batches concurrently. Defaults to instance-level
+                setting if not provided.
+            overwrite_concurrency:  number of threads to process
+                pre-existing documents in each batch (which require individual
+                API calls). Defaults to instance-level setting if not provided.

-        A note on metadata: there are constraints on the allowed field names
-        in this dictionary, coming from the underlying Astra DB API.
+        Note:
+            There are constraints on the allowed field names
+            in the metadata dictionaries, coming from the underlying Astra DB API.
            For instance, the `$` (dollar sign) cannot be used in the dict keys.
            See this document for details:
-            docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html
+            https://docs.datastax.com/en/astra/astra-db-vector/api-reference/data-api.html

        Returns:
-            List[str]: List of ids of the added texts.
+            The list of ids of the added texts.
        """
        await self._ensure_db_setup()
        if not self.async_collection:
@ -744,13 +739,15 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float, str]]:
-        """Return docs most similar to embedding vector.
+        """Return docs most similar to embedding vector with score and id.

        Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of (Document, score, id), the most similar to the query vector.
+            The list of (Document, score, id), the most similar to the query vector.
        """
        self._ensure_astra_db_client()
        metadata_parameter = self._filter_to_metadata(filter)
@ -787,13 +784,15 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float, str]]:
-        """Return docs most similar to embedding vector.
+        """Return docs most similar to embedding vector with score and id.

        Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of (Document, score, id), the most similar to the query vector.
+            The list of (Document, score, id), the most similar to the query vector.
        """
        await self._ensure_db_setup()
        if not self.async_collection:
@ -833,6 +832,16 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float, str]]:
+        """Return docs most similar to the query with score and id.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of (Document, score, id), the most similar to the query.
+        """
        embedding_vector = self.embedding.embed_query(query)
        return self.similarity_search_with_score_id_by_vector(
            embedding=embedding_vector,
@ -846,6 +855,16 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float, str]]:
+        """Return docs most similar to the query with score and id.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of (Document, score, id), the most similar to the query.
+        """
        embedding_vector = await self.embedding.aembed_query(query)
        return await self.asimilarity_search_with_score_id_by_vector(
            embedding=embedding_vector,
@ -859,13 +878,15 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float]]:
-        """Return docs most similar to embedding vector.
+        """Return docs most similar to embedding vector with score.

        Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of (Document, score), the most similar to the query vector.
+            The list of (Document, score), the most similar to the query vector.
        """
        return [
            (doc, score)
@ -882,13 +903,15 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float]]:
-        """Return docs most similar to embedding vector.
+        """Return docs most similar to embedding vector with score.

        Args:
-            embedding (str): Embedding to look up documents similar to.
-            k (int): Number of Documents to return. Defaults to 4.
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of (Document, score), the most similar to the query vector.
+            The list of (Document, score), the most similar to the query vector.
        """
        return [
            (doc, score)
@ -910,6 +933,16 @@ class AstraDBVectorStore(VectorStore):
        filter: Optional[Dict[str, Any]] = None,
        **kwargs: Any,
    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of Documents most similar to the query.
+        """
        embedding_vector = self.embedding.embed_query(query)
        return self.similarity_search_by_vector(
            embedding_vector,
@ -924,6 +957,16 @@ class AstraDBVectorStore(VectorStore):
        filter: Optional[Dict[str, Any]] = None,
        **kwargs: Any,
    ) -> List[Document]:
+        """Return docs most similar to query.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of Documents most similar to the query.
+        """
        embedding_vector = await self.embedding.aembed_query(query)
        return await self.asimilarity_search_by_vector(
            embedding_vector,
@ -938,6 +981,16 @@ class AstraDBVectorStore(VectorStore):
        filter: Optional[Dict[str, Any]] = None,
        **kwargs: Any,
    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of Documents most similar to the query vector.
+        """
        return [
            doc
            for doc, _ in self.similarity_search_with_score_by_vector(
@ -954,6 +1007,16 @@ class AstraDBVectorStore(VectorStore):
        filter: Optional[Dict[str, Any]] = None,
        **kwargs: Any,
    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Args:
+            embedding: Embedding to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of Documents most similar to the query vector.
+        """
        return [
            doc
            for doc, _ in await self.asimilarity_search_with_score_by_vector(
@ -969,6 +1032,16 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query with score.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of (Document, score), the most similar to the query vector.
+        """
        embedding_vector = self.embedding.embed_query(query)
        return self.similarity_search_with_score_by_vector(
            embedding_vector,
@ -982,6 +1055,16 @@ class AstraDBVectorStore(VectorStore):
        k: int = 4,
        filter: Optional[Dict[str, Any]] = None,
    ) -> List[Tuple[Document, float]]:
+        """Return docs most similar to query with score.
+
+        Args:
+            query: Query to look up documents similar to.
+            k: Number of Documents to return. Defaults to 4.
+            filter: Filter on the metadata to apply.
+
+        Returns:
+            The list of (Document, score), the most similar to the query vector.
+        """
        embedding_vector = await self.embedding.aembed_query(query)
        return await self.asimilarity_search_with_score_by_vector(
            embedding_vector,
@ -1022,8 +1105,10 @@ class AstraDBVectorStore(VectorStore):
        **kwargs: Any,
    ) -> List[Document]:
        """Return docs selected using the maximal marginal relevance.
+
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
+
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return.
@ -1031,8 +1116,10 @@ class AstraDBVectorStore(VectorStore):
            lambda_mult: Number between 0 and 1 that determines the degree
                of diversity among the results with 0 corresponding
                to maximum diversity and 1 to minimum diversity.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of Documents selected by maximal marginal relevance.
+            The list of Documents selected by maximal marginal relevance.
        """
        self._ensure_astra_db_client()
        metadata_parameter = self._filter_to_metadata(filter)
@ -1064,8 +1151,10 @@ class AstraDBVectorStore(VectorStore):
        **kwargs: Any,
    ) -> List[Document]:
        """Return docs selected using the maximal marginal relevance.
+
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
+
        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return.
@ -1073,8 +1162,10 @@ class AstraDBVectorStore(VectorStore):
            lambda_mult: Number between 0 and 1 that determines the degree
                of diversity among the results with 0 corresponding
                to maximum diversity and 1 to minimum diversity.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of Documents selected by maximal marginal relevance.
+            The list of Documents selected by maximal marginal relevance.
        """
        await self._ensure_db_setup()
        if not self.async_collection:
@ -1117,18 +1208,21 @@ class AstraDBVectorStore(VectorStore):
        **kwargs: Any,
    ) -> List[Document]:
        """Return docs selected using the maximal marginal relevance.
+
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
+
        Args:
-            query (str): Text to look up documents similar to.
-            k (int = 4): Number of Documents to return.
-            fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm.
-            lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree
+            query: Query to look up documents similar to.
+            k: Number of Documents to return.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
                of diversity among the results with 0 corresponding
                to maximum diversity and 1 to minimum diversity.
-                        Optional.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of Documents selected by maximal marginal relevance.
+            The list of Documents selected by maximal marginal relevance.
        """
        embedding_vector = self.embedding.embed_query(query)
        return self.max_marginal_relevance_search_by_vector(
@ -1149,18 +1243,21 @@ class AstraDBVectorStore(VectorStore):
        **kwargs: Any,
    ) -> List[Document]:
        """Return docs selected using the maximal marginal relevance.
+
        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.
+
        Args:
-            query (str): Text to look up documents similar to.
-            k (int = 4): Number of Documents to return.
-            fetch_k (int = 20): Number of Documents to fetch to pass to MMR algorithm.
-            lambda_mult (float = 0.5): Number between 0 and 1 that determines the degree
+            query: Query to look up documents similar to.
+            k: Number of Documents to return.
+            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
+            lambda_mult: Number between 0 and 1 that determines the degree
                of diversity among the results with 0 corresponding
                to maximum diversity and 1 to minimum diversity.
-                        Optional.
+            filter: Filter on the metadata to apply.
+
        Returns:
-            List of Documents selected by maximal marginal relevance.
+            The list of Documents selected by maximal marginal relevance.
        """
        embedding_vector = await self.embedding.aembed_query(query)
        return await self.amax_marginal_relevance_search_by_vector(
@ -1239,12 +1336,12 @@ class AstraDBVectorStore(VectorStore):
        """Create an Astra DB vectorstore from raw texts.

        Args:
-            texts (List[str]): the texts to insert.
-            embedding (Embeddings): the embedding function to use in the store.
-            metadatas (Optional[List[dict]]): metadata dicts for the texts.
-            ids (Optional[List[str]]): ids to associate to the texts.
-            *Additional arguments*: you can pass any argument that you would
-                to 'add_texts' and/or to the 'AstraDBVectorStore' constructor
+            texts: the texts to insert.
+            embedding: the embedding function to use in the store.
+            metadatas: metadata dicts for the texts.
+            ids: ids to associate to the texts.
+            **kwargs: you can pass any argument that you would
+                to :meth:`~add_texts` and/or to the 'AstraDBVectorStore' constructor
                (see these methods for details). These arguments will be
                routed to the respective methods as they are.

@ -1274,12 +1371,12 @@ class AstraDBVectorStore(VectorStore):
        """Create an Astra DB vectorstore from raw texts.

        Args:
-            texts (List[str]): the texts to insert.
-            embedding (Embeddings): the embedding function to use in the store.
-            metadatas (Optional[List[dict]]): metadata dicts for the texts.
-            ids (Optional[List[str]]): ids to associate to the texts.
-            *Additional arguments*: you can pass any argument that you would
-                to 'add_texts' and/or to the 'AstraDBVectorStore' constructor
+            texts: the texts to insert.
+            embedding: the embedding function to use in the store.
+            metadatas: metadata dicts for the texts.
+            ids: ids to associate to the texts.
+            **kwargs: you can pass any argument that you would
+                to :meth:`~add_texts` and/or to the 'AstraDBVectorStore' constructor
                (see these methods for details). These arguments will be
                routed to the respective methods as they are.