Create Airtable loader (#5958)

Create document loader for Airtable
This commit is contained in:
Lance Martin 2023-06-10 15:43:18 -07:00 committed by GitHub
parent 9218684759
commit 21bd16bb59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 181 additions and 0 deletions

View File

@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
:maxdepth: 1 :maxdepth: 1
:glob: :glob:
./document_loaders/examples/airtable.ipynb
./document_loaders/examples/audio.ipynb ./document_loaders/examples/audio.ipynb
./document_loaders/examples/conll-u.ipynb ./document_loaders/examples/conll-u.ipynb
./document_loaders/examples/copypaste.ipynb ./document_loaders/examples/copypaste.ipynb

View File

@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7ae421e6",
"metadata": {},
"source": [
"# Airtable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "98aea00d",
"metadata": {},
"outputs": [],
"source": [
"! pip install pyairtable"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "592483eb",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import AirtableLoader"
]
},
{
"cell_type": "markdown",
"id": "637e1205",
"metadata": {},
"source": [
"* Get your API key [here](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).\n",
"* Get ID of your base [here](https://airtable.com/developers/web/api/introduction).\n",
"* Get your table ID from the table url as shown [here](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c12a7aff",
"metadata": {},
"outputs": [],
"source": [
"api_key=\"xxx\"\n",
"base_id=\"xxx\"\n",
"table_id=\"xxx\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ccddd5a6",
"metadata": {},
"outputs": [],
"source": [
"loader = AirtableLoader(api_key,table_id,base_id)\n",
"docs = loader.load()"
]
},
{
"cell_type": "markdown",
"id": "ae76c25c",
"metadata": {},
"source": [
"Returns each table row as `dict`."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7abec7ce",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(docs)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "403c95da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': 'recF3GbGZCuh9sXIQ',\n",
" 'createdTime': '2023-06-09T04:47:21.000Z',\n",
" 'fields': {'Priority': 'High',\n",
" 'Status': 'In progress',\n",
" 'Name': 'Document Splitters'}}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eval(docs[0].page_content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,6 +1,7 @@
"""All different types of document loaders.""" """All different types of document loaders."""
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
from langchain.document_loaders.airtable import AirtableLoader
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
from langchain.document_loaders.arxiv import ArxivLoader from langchain.document_loaders.arxiv import ArxivLoader
from langchain.document_loaders.azlyrics import AZLyricsLoader from langchain.document_loaders.azlyrics import AZLyricsLoader
@ -135,6 +136,7 @@ TelegramChatLoader = TelegramChatFileLoader
__all__ = [ __all__ = [
"AZLyricsLoader", "AZLyricsLoader",
"AirbyteJSONLoader", "AirbyteJSONLoader",
"AirtableLoader",
"ApifyDatasetLoader", "ApifyDatasetLoader",
"ArxivLoader", "ArxivLoader",
"AzureBlobStorageContainerLoader", "AzureBlobStorageContainerLoader",

View File

@ -0,0 +1,36 @@
from typing import Iterator, List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class AirtableLoader(BaseLoader):
"""Loader that loads local airbyte json files."""
def __init__(self, api_token: str, table_id: str, base_id: str):
"""Initialize with API token and the IDs for table and base"""
self.api_token = api_token
self.table_id = table_id
self.base_id = base_id
def lazy_load(self) -> Iterator[Document]:
"""Load Table."""
from pyairtable import Table
table = Table(self.api_token, self.base_id, self.table_id)
records = table.all()
for record in records:
# Need to convert record from dict to str
yield Document(
page_content=str(record),
metadata={
"source": self.base_id + "_" + self.table_id,
"base_id": self.base_id,
"table_id": self.table_id,
},
)
def load(self) -> List[Document]:
"""Load Table."""
return list(self.lazy_load())