mirror of https://github.com/hwchase17/langchain
New Trello document loader (#4767)
# Added New Trello loader class and documentation Simple Loader on top of py-trello wrapper. With a board name you can pull cards and to do some field parameter tweaks on load operation. I included documentation and examples. Included unit test cases using patch and a fixture for py-trello client class. --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>pull/5405/head^2
parent
72f99ff953
commit
0b3e0dd1d2
@ -0,0 +1,184 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Trello\n",
|
||||
"\n",
|
||||
">[Trello](https://www.atlassian.com/software/trello) is a web-based project management and collaboration tool that allows individuals and teams to organize and track their tasks and projects. It provides a visual interface known as a \"board\" where users can create lists and cards to represent their tasks and activities.\n",
|
||||
"\n",
|
||||
"The TrelloLoader allows you to load cards from a Trello board and is implemented on top of [py-trello](https://pypi.org/project/py-trello/)\n",
|
||||
"\n",
|
||||
"This currently supports `api_key/token` only.\n",
|
||||
"\n",
|
||||
"1. Credentials generation: https://trello.com/power-ups/admin/\n",
|
||||
"\n",
|
||||
"2. Click in the manual token generation link to get the token.\n",
|
||||
"\n",
|
||||
"To specify the API key and token you can either set the environment variables ``TRELLO_API_KEY`` and ``TRELLO_TOKEN`` or you can pass ``api_key`` and ``token`` directly into the `from_credentials` convenience constructor method.\n",
|
||||
"\n",
|
||||
"This loader allows you to provide the board name to pull in the corresponding cards into Document objects.\n",
|
||||
"\n",
|
||||
"Notice that the board \"name\" is also called \"title\" in oficial documentation:\n",
|
||||
"\n",
|
||||
"https://support.atlassian.com/trello/docs/changing-a-boards-title-and-description/\n",
|
||||
"\n",
|
||||
"You can also specify several load parameters to include / remove different fields both from the document page_content properties and metadata.\n",
|
||||
"\n",
|
||||
"## Features\n",
|
||||
"- Load cards from a Trello board.\n",
|
||||
"- Filter cards based on their status (open or closed).\n",
|
||||
"- Include card names, comments, and checklists in the loaded documents.\n",
|
||||
"- Customize the additional metadata fields to include in the document.\n",
|
||||
"\n",
|
||||
"By default all card fields are included for the full text page_content and metadata accordinly.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install py-trello beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"········\n",
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If you have already set the API key and token using environment variables,\n",
|
||||
"# you can skip this cell and comment out the `api_key` and `token` named arguments\n",
|
||||
"# in the initialization steps below.\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"API_KEY = getpass()\n",
|
||||
"TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Review Tech partner pages\n",
|
||||
"Comments:\n",
|
||||
"{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'labels': ['Demand Marketing'], 'list': 'Done', 'closed': False, 'due_date': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TrelloLoader\n",
|
||||
"\n",
|
||||
"# Get the open cards from \"Awesome Board\"\n",
|
||||
"loader = TrelloLoader.from_credentials(\n",
|
||||
" \"Awesome Board\",\n",
|
||||
" api_key=API_KEY,\n",
|
||||
" token=TOKEN,\n",
|
||||
" card_filter=\"open\",\n",
|
||||
" )\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"print(documents[0].page_content)\n",
|
||||
"print(documents[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Review Tech partner pages\n",
|
||||
"Comments:\n",
|
||||
"{'title': 'Review Tech partner pages', 'id': '6475357890dc8d17f73f2dcc', 'url': 'https://trello.com/c/b0OTZwkZ/1-review-tech-partner-pages', 'list': 'Done'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get all the cards from \"Awesome Board\" but only include the\n",
|
||||
"# card list(column) as extra metadata.\n",
|
||||
"loader = TrelloLoader.from_credentials(\n",
|
||||
" \"Awesome Board\",\n",
|
||||
" api_key=API_KEY,\n",
|
||||
" token=TOKEN,\n",
|
||||
" extra_metadata=(\"list\"),\n",
|
||||
")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"print(documents[0].page_content)\n",
|
||||
"print(documents[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the cards from \"Another Board\" and exclude the card name,\n",
|
||||
"# checklist and comments from the Document page_content text.\n",
|
||||
"loader = TrelloLoader.from_credentials(\n",
|
||||
" \"test\",\n",
|
||||
" api_key=API_KEY,\n",
|
||||
" token=TOKEN,\n",
|
||||
" include_card_name= False,\n",
|
||||
" include_checklist= False,\n",
|
||||
" include_comments= False,\n",
|
||||
")\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"print(\"Document: \" + documents[0].page_content)\n",
|
||||
"print(documents[0].metadata)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "cc99336516f23363341912c6723b01ace86f02e26b4290be1efc0677e2e2ec24"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -0,0 +1,168 @@
|
||||
"""Loader that loads cards from Trello"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List, Literal, Optional, Tuple
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from trello import Board, Card, TrelloClient
|
||||
|
||||
|
||||
class TrelloLoader(BaseLoader):
|
||||
"""Trello loader. Reads all cards from a Trello board."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: TrelloClient,
|
||||
board_name: str,
|
||||
*,
|
||||
include_card_name: bool = True,
|
||||
include_comments: bool = True,
|
||||
include_checklist: bool = True,
|
||||
card_filter: Literal["closed", "open", "all"] = "all",
|
||||
extra_metadata: Tuple[str, ...] = ("due_date", "labels", "list", "closed"),
|
||||
):
|
||||
"""Initialize Trello loader.
|
||||
|
||||
Args:
|
||||
client: Trello API client.
|
||||
board_name: The name of the Trello board.
|
||||
include_card_name: Whether to include the name of the card in the document.
|
||||
include_comments: Whether to include the comments on the card in the
|
||||
document.
|
||||
include_checklist: Whether to include the checklist on the card in the
|
||||
document.
|
||||
card_filter: Filter on card status. Valid values are "closed", "open",
|
||||
"all".
|
||||
extra_metadata: List of additional metadata fields to include as document
|
||||
metadata.Valid values are "due_date", "labels", "list", "closed".
|
||||
|
||||
"""
|
||||
self.client = client
|
||||
self.board_name = board_name
|
||||
self.include_card_name = include_card_name
|
||||
self.include_comments = include_comments
|
||||
self.include_checklist = include_checklist
|
||||
self.extra_metadata = extra_metadata
|
||||
self.card_filter = card_filter
|
||||
|
||||
@classmethod
|
||||
def from_credentials(
|
||||
cls,
|
||||
board_name: str,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
token: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> TrelloLoader:
|
||||
"""Convenience constructor that builds TrelloClient init param for you.
|
||||
|
||||
Args:
|
||||
board_name: The name of the Trello board.
|
||||
api_key: Trello API key. Can also be specified as environment variable
|
||||
TRELLO_API_KEY.
|
||||
token: Trello token. Can also be specified as environment variable
|
||||
TRELLO_TOKEN.
|
||||
include_card_name: Whether to include the name of the card in the document.
|
||||
include_comments: Whether to include the comments on the card in the
|
||||
document.
|
||||
include_checklist: Whether to include the checklist on the card in the
|
||||
document.
|
||||
card_filter: Filter on card status. Valid values are "closed", "open",
|
||||
"all".
|
||||
extra_metadata: List of additional metadata fields to include as document
|
||||
metadata.Valid values are "due_date", "labels", "list", "closed".
|
||||
"""
|
||||
|
||||
try:
|
||||
from trello import TrelloClient # type: ignore
|
||||
except ImportError as ex:
|
||||
raise ImportError(
|
||||
"Could not import trello python package. "
|
||||
"Please install it with `pip install py-trello`."
|
||||
) from ex
|
||||
api_key = api_key or get_from_env("api_key", "TRELLO_API_KEY")
|
||||
token = token or get_from_env("token", "TRELLO_TOKEN")
|
||||
client = TrelloClient(api_key=api_key, token=token)
|
||||
return cls(client, board_name, **kwargs)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Loads all cards from the specified Trello board.
|
||||
|
||||
You can filter the cards, metadata and text included by using the optional
|
||||
parameters.
|
||||
|
||||
Returns:
|
||||
A list of documents, one for each card in the board.
|
||||
"""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # noqa: F401
|
||||
except ImportError as ex:
|
||||
raise ImportError(
|
||||
"`beautifulsoup4` package not found, please run"
|
||||
" `pip install beautifulsoup4`"
|
||||
) from ex
|
||||
|
||||
board = self._get_board()
|
||||
# Create a dictionary with the list IDs as keys and the list names as values
|
||||
list_dict = {list_item.id: list_item.name for list_item in board.list_lists()}
|
||||
# Get Cards on the board
|
||||
cards = board.get_cards(card_filter=self.card_filter)
|
||||
return [self._card_to_doc(card, list_dict) for card in cards]
|
||||
|
||||
def _get_board(self) -> Board:
|
||||
# Find the first board with a matching name
|
||||
board = next(
|
||||
(b for b in self.client.list_boards() if b.name == self.board_name), None
|
||||
)
|
||||
if not board:
|
||||
raise ValueError(f"Board `{self.board_name}` not found.")
|
||||
return board
|
||||
|
||||
def _card_to_doc(self, card: Card, list_dict: dict) -> Document:
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
|
||||
text_content = ""
|
||||
if self.include_card_name:
|
||||
text_content = card.name + "\n"
|
||||
if card.description.strip():
|
||||
text_content += BeautifulSoup(card.description, "lxml").get_text()
|
||||
if self.include_checklist:
|
||||
# Get all the checklist items on the card
|
||||
for checklist in card.checklists:
|
||||
if checklist.items:
|
||||
items = [
|
||||
f"{item['name']}:{item['state']}" for item in checklist.items
|
||||
]
|
||||
text_content += f"\n{checklist.name}\n" + "\n".join(items)
|
||||
|
||||
if self.include_comments:
|
||||
# Get all the comments on the card
|
||||
comments = [
|
||||
BeautifulSoup(comment["data"]["text"], "lxml").get_text()
|
||||
for comment in card.comments
|
||||
]
|
||||
text_content += "Comments:" + "\n".join(comments)
|
||||
|
||||
# Default metadata fields
|
||||
metadata = {
|
||||
"title": card.name,
|
||||
"id": card.id,
|
||||
"url": card.url,
|
||||
}
|
||||
|
||||
# Extra metadata fields. Card object is not subscriptable.
|
||||
if "labels" in self.extra_metadata:
|
||||
metadata["labels"] = [label.name for label in card.labels]
|
||||
if "list" in self.extra_metadata:
|
||||
if card.list_id in list_dict:
|
||||
metadata["list"] = list_dict[card.list_id]
|
||||
if "closed" in self.extra_metadata:
|
||||
metadata["closed"] = card.closed
|
||||
if "due_date" in self.extra_metadata:
|
||||
metadata["due_date"] = card.due_date
|
||||
|
||||
return Document(page_content=text_content, metadata=metadata)
|
@ -0,0 +1,341 @@
|
||||
import unittest
|
||||
from collections import namedtuple
|
||||
from typing import Any, Optional
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.document_loaders.trello import TrelloLoader
|
||||
|
||||
|
||||
def list_to_objects(dict_list: list) -> list:
|
||||
"""Helper to convert dict objects."""
|
||||
return [
|
||||
namedtuple("Object", d.keys())(**d) for d in dict_list if isinstance(d, dict)
|
||||
]
|
||||
|
||||
|
||||
def card_list_to_objects(cards: list) -> list:
|
||||
"""Helper to convert dict cards into trello weird mix of objects and dictionaries"""
|
||||
for card in cards:
|
||||
card["checklists"] = list_to_objects(card.get("checklists"))
|
||||
card["labels"] = list_to_objects(card.get("labels"))
|
||||
return list_to_objects(cards)
|
||||
|
||||
|
||||
class MockBoard:
|
||||
"""
|
||||
Defining Trello mock board internal object to use in the patched method.
|
||||
"""
|
||||
|
||||
def __init__(self, id: str, name: str, cards: list, lists: list):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.cards = cards
|
||||
self.lists = lists
|
||||
|
||||
def get_cards(self, card_filter: Optional[str] = "") -> list:
|
||||
"""We do not need to test the card-filter since is on Trello Client side."""
|
||||
return self.cards
|
||||
|
||||
def list_lists(self) -> list:
|
||||
return self.lists
|
||||
|
||||
|
||||
TRELLO_LISTS = [
|
||||
{
|
||||
"id": "5555cacbc4daa90564b34cf2",
|
||||
"name": "Publishing Considerations",
|
||||
},
|
||||
{
|
||||
"id": "5555059b74c03b3a9e362cd0",
|
||||
"name": "Backlog",
|
||||
},
|
||||
{
|
||||
"id": "555505a3427fd688c1ca5ebd",
|
||||
"name": "Selected for Milestone",
|
||||
},
|
||||
{
|
||||
"id": "555505ba95ff925f9fb1b370",
|
||||
"name": "Blocked",
|
||||
},
|
||||
{
|
||||
"id": "555505a695ff925f9fb1b13d",
|
||||
"name": "In Progress",
|
||||
},
|
||||
{
|
||||
"id": "555505bdfe380c7edc8ca1a3",
|
||||
"name": "Done",
|
||||
},
|
||||
]
|
||||
# Create a mock list of cards.
|
||||
TRELLO_CARDS_QA = [
|
||||
{
|
||||
"id": "12350aca6952888df7975903",
|
||||
"name": "Closed Card Title",
|
||||
"description": "This is the <em>description</em> of Closed Card.",
|
||||
"closed": True,
|
||||
"labels": [],
|
||||
"due_date": "",
|
||||
"url": "https://trello.com/card/12350aca6952888df7975903",
|
||||
"list_id": "555505bdfe380c7edc8ca1a3",
|
||||
"checklists": [
|
||||
{
|
||||
"name": "Checklist 1",
|
||||
"items": [
|
||||
{
|
||||
"name": "Item 1",
|
||||
"state": "pending",
|
||||
},
|
||||
{
|
||||
"name": "Item 2",
|
||||
"state": "completed",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"comments": [
|
||||
{
|
||||
"data": {
|
||||
"text": "This is a comment on a <s>Closed</s> Card.",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": "45650aca6952888df7975903",
|
||||
"name": "Card 2",
|
||||
"description": "This is the description of <strong>Card 2</strong>.",
|
||||
"closed": False,
|
||||
"labels": [{"name": "Medium"}, {"name": "Task"}],
|
||||
"due_date": "",
|
||||
"url": "https://trello.com/card/45650aca6952888df7975903",
|
||||
"list_id": "555505a695ff925f9fb1b13d",
|
||||
"checklists": [],
|
||||
"comments": [],
|
||||
},
|
||||
{
|
||||
"id": "55550aca6952888df7975903",
|
||||
"name": "Camera",
|
||||
"description": "<div></div>",
|
||||
"closed": False,
|
||||
"labels": [{"name": "Task"}],
|
||||
"due_date": "",
|
||||
"url": "https://trello.com/card/55550aca6952888df7975903",
|
||||
"list_id": "555505a3427fd688c1ca5ebd",
|
||||
"checklists": [
|
||||
{
|
||||
"name": "Tasks",
|
||||
"items": [
|
||||
{"name": "Zoom", "state": "complete"},
|
||||
{"name": "Follow players", "state": "complete"},
|
||||
{
|
||||
"name": "camera limit to stage size",
|
||||
"state": "complete",
|
||||
},
|
||||
{"name": "Post Processing effects", "state": "complete"},
|
||||
{
|
||||
"name": "Shitch to universal render pipeline",
|
||||
"state": "complete",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"comments": [
|
||||
{
|
||||
"data": {
|
||||
"text": (
|
||||
"to follow group of players use Group Camera feature of "
|
||||
"cinemachine."
|
||||
)
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text": "Use 'Impulse' <s>Cinemachine</s> feature for camera shake."
|
||||
}
|
||||
},
|
||||
{"data": {"text": "depth of field with custom shader."}},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_trello_client() -> Any:
|
||||
"""Fixture that creates a mock for trello.TrelloClient."""
|
||||
# Create a mock `trello.TrelloClient` object.
|
||||
with patch("trello.TrelloClient") as mock_trello_client:
|
||||
# Create a mock list of trello list (columns in the UI).
|
||||
|
||||
# The trello client returns a hierarchy mix of objects and dictionaries.
|
||||
list_objs = list_to_objects(TRELLO_LISTS)
|
||||
cards_qa_objs = card_list_to_objects(TRELLO_CARDS_QA)
|
||||
boards = [
|
||||
MockBoard("5555eaafea917522902a2a2c", "Research", [], list_objs),
|
||||
MockBoard("55559f6002dd973ad8cdbfb7", "QA", cards_qa_objs, list_objs),
|
||||
]
|
||||
|
||||
# Patch `get_boards()` method of the mock `TrelloClient` object to return the
|
||||
# mock list of boards.
|
||||
mock_trello_client.return_value.list_boards.return_value = boards
|
||||
yield mock_trello_client.return_value
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("mock_trello_client")
|
||||
@pytest.mark.requires("trello", "bs4", "lxml")
|
||||
class TestTrelloLoader(unittest.TestCase):
|
||||
def test_empty_board(self) -> None:
|
||||
"""
|
||||
Test loading a board with no cards.
|
||||
"""
|
||||
trello_loader = TrelloLoader.from_credentials(
|
||||
"Research",
|
||||
api_key="API_KEY",
|
||||
token="API_TOKEN",
|
||||
)
|
||||
documents = trello_loader.load()
|
||||
self.assertEqual(len(documents), 0, "Empty board returns an empty list.")
|
||||
|
||||
def test_complete_text_and_metadata(self) -> None:
|
||||
"""
|
||||
Test loading a board cards with all metadata.
|
||||
"""
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
trello_loader = TrelloLoader.from_credentials(
|
||||
"QA",
|
||||
api_key="API_KEY",
|
||||
token="API_TOKEN",
|
||||
)
|
||||
documents = trello_loader.load()
|
||||
self.assertEqual(len(documents), len(TRELLO_CARDS_QA), "Card count matches.")
|
||||
|
||||
soup = BeautifulSoup(documents[0].page_content, "html.parser")
|
||||
self.assertTrue(
|
||||
len(soup.find_all()) == 0,
|
||||
"There is not markup in Closed Card document content.",
|
||||
)
|
||||
|
||||
# Check samples of every field type is present in page content.
|
||||
texts = [
|
||||
"Closed Card Title",
|
||||
"This is the description of Closed Card.",
|
||||
"Checklist 1",
|
||||
"Item 1:pending",
|
||||
"This is a comment on a Closed Card.",
|
||||
]
|
||||
for text in texts:
|
||||
self.assertTrue(text in documents[0].page_content)
|
||||
|
||||
# Check all metadata is present in first Card
|
||||
self.assertEqual(
|
||||
documents[0].metadata,
|
||||
{
|
||||
"title": "Closed Card Title",
|
||||
"id": "12350aca6952888df7975903",
|
||||
"url": "https://trello.com/card/12350aca6952888df7975903",
|
||||
"labels": [],
|
||||
"list": "Done",
|
||||
"closed": True,
|
||||
"due_date": "",
|
||||
},
|
||||
"Metadata of Closed Card Matches.",
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(documents[1].page_content, "html.parser")
|
||||
self.assertTrue(
|
||||
len(soup.find_all()) == 0,
|
||||
"There is not markup in Card 2 document content.",
|
||||
)
|
||||
|
||||
# Check samples of every field type is present in page content.
|
||||
texts = [
|
||||
"Card 2",
|
||||
"This is the description of Card 2.",
|
||||
]
|
||||
for text in texts:
|
||||
self.assertTrue(text in documents[1].page_content)
|
||||
|
||||
# Check all metadata is present in second Card
|
||||
self.assertEqual(
|
||||
documents[1].metadata,
|
||||
{
|
||||
"title": "Card 2",
|
||||
"id": "45650aca6952888df7975903",
|
||||
"url": "https://trello.com/card/45650aca6952888df7975903",
|
||||
"labels": ["Medium", "Task"],
|
||||
"list": "In Progress",
|
||||
"closed": False,
|
||||
"due_date": "",
|
||||
},
|
||||
"Metadata of Card 2 Matches.",
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(documents[2].page_content, "html.parser")
|
||||
self.assertTrue(
|
||||
len(soup.find_all()) == 0,
|
||||
"There is not markup in Card 2 document content.",
|
||||
)
|
||||
|
||||
# Check samples of every field type is present in page content.
|
||||
texts = [
|
||||
"Camera",
|
||||
"camera limit to stage size:complete",
|
||||
"Use 'Impulse' Cinemachine feature for camera shake.",
|
||||
]
|
||||
|
||||
for text in texts:
|
||||
self.assertTrue(text in documents[2].page_content, text + " is present.")
|
||||
|
||||
# Check all metadata is present in second Card
|
||||
self.assertEqual(
|
||||
documents[2].metadata,
|
||||
{
|
||||
"title": "Camera",
|
||||
"id": "55550aca6952888df7975903",
|
||||
"url": "https://trello.com/card/55550aca6952888df7975903",
|
||||
"labels": ["Task"],
|
||||
"list": "Selected for Milestone",
|
||||
"closed": False,
|
||||
"due_date": "",
|
||||
},
|
||||
"Metadata of Camera Card matches.",
|
||||
)
|
||||
|
||||
def test_partial_text_and_metadata(self) -> None:
|
||||
"""
|
||||
Test loading a board cards removing some text and metadata.
|
||||
"""
|
||||
trello_loader = TrelloLoader.from_credentials(
|
||||
"QA",
|
||||
api_key="API_KEY",
|
||||
token="API_TOKEN",
|
||||
extra_metadata=("list"),
|
||||
include_card_name=False,
|
||||
include_checklist=False,
|
||||
include_comments=False,
|
||||
)
|
||||
documents = trello_loader.load()
|
||||
|
||||
# Check samples of every field type is present in page content.
|
||||
texts = [
|
||||
"Closed Card Title",
|
||||
"Checklist 1",
|
||||
"Item 1:pending",
|
||||
"This is a comment on a Closed Card.",
|
||||
]
|
||||
for text in texts:
|
||||
self.assertFalse(text in documents[0].page_content)
|
||||
|
||||
# Check all metadata is present in first Card
|
||||
self.assertEqual(
|
||||
documents[0].metadata,
|
||||
{
|
||||
"title": "Closed Card Title",
|
||||
"id": "12350aca6952888df7975903",
|
||||
"url": "https://trello.com/card/12350aca6952888df7975903",
|
||||
"list": "Done",
|
||||
},
|
||||
"Metadata of Closed Card Matches.",
|
||||
)
|
Loading…
Reference in New Issue