diff --git a/docs/modules/indexes/document_loaders/examples/trello.ipynb b/docs/modules/indexes/document_loaders/examples/trello.ipynb
new file mode 100644
index 00000000..8367f2fa
--- /dev/null
+++ b/docs/modules/indexes/document_loaders/examples/trello.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Trello\n",
+ "\n",
+ ">[Trello](https://www.atlassian.com/software/trello) is a web-based project management and collaboration tool that allows individuals and teams to organize and track their tasks and projects. It provides a visual interface known as a \"board\" where users can create lists and cards to represent their tasks and activities.\n",
+ "\n",
+ "The TrelloLoader allows you to load cards from a Trello board and is implemented on top of [py-trello](https://pypi.org/project/py-trello/)\n",
+ "\n",
+ "This currently supports `api_key/token` only.\n",
+ "\n",
+ "1. Credentials generation: https://trello.com/power-ups/admin/\n",
+ "\n",
+ "2. Click in the manual token generation link to get the token.\n",
+ "\n",
+ "To specify the API key and token you can either set the environment variables ``TRELLO_API_KEY`` and ``TRELLO_TOKEN`` or you can pass ``api_key`` and ``token`` directly into the `from_credentials` convenience constructor method.\n",
+ "\n",
+ "This loader allows you to provide the board name to pull in the corresponding cards into Document objects.\n",
+ "\n",
+ "Notice that the board \"name\" is also called \"title\" in oficial documentation:\n",
+ "\n",
+ "https://support.atlassian.com/trello/docs/changing-a-boards-title-and-description/\n",
+ "\n",
+ "You can also specify several load parameters to include / remove different fields both from the document page_content properties and metadata.\n",
+ "\n",
+ "## Features\n",
+ "- Load cards from a Trello board.\n",
+ "- Filter cards based on their status (open or closed).\n",
+ "- Include card names, comments, and checklists in the loaded documents.\n",
+ "- Customize the additional metadata fields to include in the document.\n",
+ "\n",
+ "By default all card fields are included for the full text page_content and metadata accordinly.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "#!pip install py-trello beautifulsoup4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "········\n",
+ "········\n"
+ ]
+ }
+ ],
+ "source": [
+ "# If you have already set the API key and token using environment variables,\n",
+ "# you can skip this cell and comment out the `api_key` and `token` named arguments\n",
+ "# in the initialization steps below.\n",
+ "from getpass import getpass\n",
+ "\n",
+ "API_KEY = getpass()\n",
+ "TOKEN = getpass()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Review Tech partner pages\n",
+ "Comments:\n",
+ "{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'labels': ['Demand Marketing'], 'list': 'Done', 'closed': False, 'due_date': ''}\n"
+ ]
+ }
+ ],
+ "source": [
+ "from langchain.document_loaders import TrelloLoader\n",
+ "\n",
+ "# Get the open cards from \"Awesome Board\"\n",
+ "loader = TrelloLoader.from_credentials(\n",
+ " \"Awesome Board\",\n",
+ " api_key=API_KEY,\n",
+ " token=TOKEN,\n",
+ " card_filter=\"open\",\n",
+ " )\n",
+ "documents = loader.load()\n",
+ "\n",
+ "print(documents[0].page_content)\n",
+ "print(documents[0].metadata)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Review Tech partner pages\n",
+ "Comments:\n",
+ "{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'list': 'Done'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Get all the cards from \"Awesome Board\" but only include the\n",
+ "# card list(column) as extra metadata.\n",
+ "loader = TrelloLoader.from_credentials(\n",
+ " \"Awesome Board\",\n",
+ " api_key=API_KEY,\n",
+ " token=TOKEN,\n",
+ " extra_metadata=(\"list\"),\n",
+ ")\n",
+ "documents = loader.load()\n",
+ "\n",
+ "print(documents[0].page_content)\n",
+ "print(documents[0].metadata)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get the cards from \"Another Board\" and exclude the card name,\n",
+ "# checklist and comments from the Document page_content text.\n",
+ "loader = TrelloLoader.from_credentials(\n",
+ " \"test\",\n",
+ " api_key=API_KEY,\n",
+ " token=TOKEN,\n",
+ " include_card_name= False,\n",
+ " include_checklist= False,\n",
+ " include_comments= False,\n",
+ ")\n",
+ "documents = loader.load()\n",
+ "\n",
+ "print(\"Document: \" + documents[0].page_content)\n",
+ "print(documents[0].metadata)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py
index 3b8a4308..3155fe24 100644
--- a/langchain/document_loaders/__init__.py
+++ b/langchain/document_loaders/__init__.py
@@ -92,6 +92,7 @@ from langchain.document_loaders.telegram import (
from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.tomarkdown import ToMarkdownLoader
from langchain.document_loaders.toml import TomlLoader
+from langchain.document_loaders.trello import TrelloLoader
from langchain.document_loaders.twitter import TwitterTweetLoader
from langchain.document_loaders.unstructured import (
UnstructuredAPIFileIOLoader,
@@ -201,6 +202,7 @@ __all__ = [
"StripeLoader",
"TextLoader",
"TomlLoader",
+ "TrelloLoader",
"TwitterTweetLoader",
"UnstructuredAPIFileIOLoader",
"UnstructuredAPIFileLoader",
diff --git a/langchain/document_loaders/trello.py b/langchain/document_loaders/trello.py
new file mode 100644
index 00000000..5c243586
--- /dev/null
+++ b/langchain/document_loaders/trello.py
@@ -0,0 +1,168 @@
+"""Loader that loads cards from Trello"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple
+
+from langchain.docstore.document import Document
+from langchain.document_loaders.base import BaseLoader
+from langchain.utils import get_from_env
+
+if TYPE_CHECKING:
+ from trello import Board, Card, TrelloClient
+
+
+class TrelloLoader(BaseLoader):
+ """Trello loader. Reads all cards from a Trello board."""
+
+ def __init__(
+ self,
+ client: TrelloClient,
+ board_name: str,
+ *,
+ include_card_name: bool = True,
+ include_comments: bool = True,
+ include_checklist: bool = True,
+ card_filter: Literal["closed", "open", "all"] = "all",
+ extra_metadata: Tuple[str, ...] = ("due_date", "labels", "list", "closed"),
+ ):
+ """Initialize Trello loader.
+
+ Args:
+ client: Trello API client.
+ board_name: The name of the Trello board.
+ include_card_name: Whether to include the name of the card in the document.
+ include_comments: Whether to include the comments on the card in the
+ document.
+ include_checklist: Whether to include the checklist on the card in the
+ document.
+ card_filter: Filter on card status. Valid values are "closed", "open",
+ "all".
+ extra_metadata: List of additional metadata fields to include as document
+ metadata.Valid values are "due_date", "labels", "list", "closed".
+
+ """
+ self.client = client
+ self.board_name = board_name
+ self.include_card_name = include_card_name
+ self.include_comments = include_comments
+ self.include_checklist = include_checklist
+ self.extra_metadata = extra_metadata
+ self.card_filter = card_filter
+
+ @classmethod
+ def from_credentials(
+ cls,
+ board_name: str,
+ *,
+ api_key: Optional[str] = None,
+ token: Optional[str] = None,
+ **kwargs: Any,
+ ) -> TrelloLoader:
+ """Convenience constructor that builds TrelloClient init param for you.
+
+ Args:
+ board_name: The name of the Trello board.
+ api_key: Trello API key. Can also be specified as environment variable
+ TRELLO_API_KEY.
+ token: Trello token. Can also be specified as environment variable
+ TRELLO_TOKEN.
+ include_card_name: Whether to include the name of the card in the document.
+ include_comments: Whether to include the comments on the card in the
+ document.
+ include_checklist: Whether to include the checklist on the card in the
+ document.
+ card_filter: Filter on card status. Valid values are "closed", "open",
+ "all".
+ extra_metadata: List of additional metadata fields to include as document
+ metadata.Valid values are "due_date", "labels", "list", "closed".
+ """
+
+ try:
+ from trello import TrelloClient # type: ignore
+ except ImportError as ex:
+ raise ImportError(
+ "Could not import trello python package. "
+ "Please install it with `pip install py-trello`."
+ ) from ex
+ api_key = api_key or get_from_env("api_key", "TRELLO_API_KEY")
+ token = token or get_from_env("token", "TRELLO_TOKEN")
+ client = TrelloClient(api_key=api_key, token=token)
+ return cls(client, board_name, **kwargs)
+
+ def load(self) -> List[Document]:
+ """Loads all cards from the specified Trello board.
+
+ You can filter the cards, metadata and text included by using the optional
+ parameters.
+
+ Returns:
+ A list of documents, one for each card in the board.
+ """
+ try:
+ from bs4 import BeautifulSoup # noqa: F401
+ except ImportError as ex:
+ raise ImportError(
+ "`beautifulsoup4` package not found, please run"
+ " `pip install beautifulsoup4`"
+ ) from ex
+
+ board = self._get_board()
+ # Create a dictionary with the list IDs as keys and the list names as values
+ list_dict = {list_item.id: list_item.name for list_item in board.list_lists()}
+ # Get Cards on the board
+ cards = board.get_cards(card_filter=self.card_filter)
+ return [self._card_to_doc(card, list_dict) for card in cards]
+
+ def _get_board(self) -> Board:
+ # Find the first board with a matching name
+ board = next(
+ (b for b in self.client.list_boards() if b.name == self.board_name), None
+ )
+ if not board:
+ raise ValueError(f"Board `{self.board_name}` not found.")
+ return board
+
+ def _card_to_doc(self, card: Card, list_dict: dict) -> Document:
+ from bs4 import BeautifulSoup # type: ignore
+
+ text_content = ""
+ if self.include_card_name:
+ text_content = card.name + "\n"
+ if card.description.strip():
+ text_content += BeautifulSoup(card.description, "lxml").get_text()
+ if self.include_checklist:
+ # Get all the checklist items on the card
+ for checklist in card.checklists:
+ if checklist.items:
+ items = [
+ f"{item['name']}:{item['state']}" for item in checklist.items
+ ]
+ text_content += f"\n{checklist.name}\n" + "\n".join(items)
+
+ if self.include_comments:
+ # Get all the comments on the card
+ comments = [
+ BeautifulSoup(comment["data"]["text"], "lxml").get_text()
+ for comment in card.comments
+ ]
+ text_content += "Comments:" + "\n".join(comments)
+
+ # Default metadata fields
+ metadata = {
+ "title": card.name,
+ "id": card.id,
+ "url": card.url,
+ }
+
+ # Extra metadata fields. Card object is not subscriptable.
+ if "labels" in self.extra_metadata:
+ metadata["labels"] = [label.name for label in card.labels]
+ if "list" in self.extra_metadata:
+ if card.list_id in list_dict:
+ metadata["list"] = list_dict[card.list_id]
+ if "closed" in self.extra_metadata:
+ metadata["closed"] = card.closed
+ if "due_date" in self.extra_metadata:
+ metadata["due_date"] = card.due_date
+
+ return Document(page_content=text_content, metadata=metadata)
diff --git a/poetry.lock b/poetry.lock
index df7d3dc1..70668ca5 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -6626,6 +6626,23 @@ files = [
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
]
+[[package]]
+name = "py-trello"
+version = "0.19.0"
+description = "Python wrapper around the Trello API"
+category = "main"
+optional = true
+python-versions = "*"
+files = [
+ {file = "py-trello-0.19.0.tar.gz", hash = "sha256:f4a8c05db61fad0ef5fa35d62c29806c75d9d2b797358d9cf77275e2cbf23020"},
+]
+
+[package.dependencies]
+python-dateutil = "*"
+pytz = "*"
+requests = "*"
+requests-oauthlib = ">=0.4.1"
+
[[package]]
name = "pyaes"
version = "1.6.1"
@@ -10903,7 +10920,7 @@ azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-
cohere = ["cohere"]
docarray = ["docarray"]
embeddings = ["sentence-transformers"]
-extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "bibtexparser", "chardet", "gql", "html2text", "jq", "lxml", "pandas", "pdfminer-six", "psychicapi", "pymupdf", "pypdf", "pypdfium2", "requests-toolbelt", "scikit-learn", "telethon", "tqdm", "zep-python"]
+extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "bibtexparser", "chardet", "gql", "html2text", "jq", "lxml", "pandas", "pdfminer-six", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "requests-toolbelt", "scikit-learn", "telethon", "tqdm", "zep-python"]
llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
openai = ["openai", "tiktoken"]
qdrant = ["qdrant-client"]
@@ -10912,4 +10929,4 @@ text-helpers = ["chardet"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
-content-hash = "5e83a1f4ca8c0d3107363e393485174fd72ce9db93db5dc7c21b2dd37b184e66"
+content-hash = "1033e47cdab7d3a15fb9322bad64609f77fd3befc47c1a01dc91b22cbbc708a3"
diff --git a/pyproject.toml b/pyproject.toml
index c61b4524..7b3cb809 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,6 +97,7 @@ scikit-learn = {version = "^1.2.2", optional = true}
azure-ai-formrecognizer = {version = "^3.2.1", optional = true}
azure-ai-vision = {version = "^0.11.1b1", optional = true}
azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
+py-trello = {version = "^0.19.0", optional = true}
momento = {version = "^1.5.0", optional = true}
bibtexparser = {version = "^1.4.0", optional = true}
@@ -298,6 +299,7 @@ extended_testing = [
"gql",
"requests_toolbelt",
"html2text",
+ "py-trello",
"scikit-learn",
]
diff --git a/tests/unit_tests/document_loaders/test_trello.py b/tests/unit_tests/document_loaders/test_trello.py
new file mode 100644
index 00000000..8ef0a118
--- /dev/null
+++ b/tests/unit_tests/document_loaders/test_trello.py
@@ -0,0 +1,341 @@
+import unittest
+from collections import namedtuple
+from typing import Any, Optional
+from unittest.mock import patch
+
+import pytest
+
+from langchain.document_loaders.trello import TrelloLoader
+
+
+def list_to_objects(dict_list: list) -> list:
+ """Helper to convert dict objects."""
+ return [
+ namedtuple("Object", d.keys())(**d) for d in dict_list if isinstance(d, dict)
+ ]
+
+
+def card_list_to_objects(cards: list) -> list:
+ """Helper to convert dict cards into trello weird mix of objects and dictionaries"""
+ for card in cards:
+ card["checklists"] = list_to_objects(card.get("checklists"))
+ card["labels"] = list_to_objects(card.get("labels"))
+ return list_to_objects(cards)
+
+
+class MockBoard:
+ """
+ Defining Trello mock board internal object to use in the patched method.
+ """
+
+ def __init__(self, id: str, name: str, cards: list, lists: list):
+ self.id = id
+ self.name = name
+ self.cards = cards
+ self.lists = lists
+
+ def get_cards(self, card_filter: Optional[str] = "") -> list:
+ """We do not need to test the card-filter since is on Trello Client side."""
+ return self.cards
+
+ def list_lists(self) -> list:
+ return self.lists
+
+
+TRELLO_LISTS = [
+ {
+ "id": "5555cacbc4daa90564b34cf2",
+ "name": "Publishing Considerations",
+ },
+ {
+ "id": "5555059b74c03b3a9e362cd0",
+ "name": "Backlog",
+ },
+ {
+ "id": "555505a3427fd688c1ca5ebd",
+ "name": "Selected for Milestone",
+ },
+ {
+ "id": "555505ba95ff925f9fb1b370",
+ "name": "Blocked",
+ },
+ {
+ "id": "555505a695ff925f9fb1b13d",
+ "name": "In Progress",
+ },
+ {
+ "id": "555505bdfe380c7edc8ca1a3",
+ "name": "Done",
+ },
+]
+# Create a mock list of cards.
+TRELLO_CARDS_QA = [
+ {
+ "id": "12350aca6952888df7975903",
+ "name": "Closed Card Title",
+ "description": "This is the description of Closed Card.",
+ "closed": True,
+ "labels": [],
+ "due_date": "",
+ "url": "https://trello.com/card/12350aca6952888df7975903",
+ "list_id": "555505bdfe380c7edc8ca1a3",
+ "checklists": [
+ {
+ "name": "Checklist 1",
+ "items": [
+ {
+ "name": "Item 1",
+ "state": "pending",
+ },
+ {
+ "name": "Item 2",
+ "state": "completed",
+ },
+ ],
+ },
+ ],
+ "comments": [
+ {
+ "data": {
+ "text": "This is a comment on a Closed Card.",
+ },
+ },
+ ],
+ },
+ {
+ "id": "45650aca6952888df7975903",
+ "name": "Card 2",
+ "description": "This is the description of Card 2.",
+ "closed": False,
+ "labels": [{"name": "Medium"}, {"name": "Task"}],
+ "due_date": "",
+ "url": "https://trello.com/card/45650aca6952888df7975903",
+ "list_id": "555505a695ff925f9fb1b13d",
+ "checklists": [],
+ "comments": [],
+ },
+ {
+ "id": "55550aca6952888df7975903",
+ "name": "Camera",
+ "description": "