forked from Archives/langchain
parent
9218684759
commit
21bd16bb59
@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
./document_loaders/examples/airtable.ipynb
|
||||
./document_loaders/examples/audio.ipynb
|
||||
./document_loaders/examples/conll-u.ipynb
|
||||
./document_loaders/examples/copypaste.ipynb
|
||||
|
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
@ -0,0 +1,142 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ae421e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98aea00d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install pyairtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "592483eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AirtableLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637e1205",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Get your API key [here](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).\n",
|
||||
"* Get ID of your base [here](https://airtable.com/developers/web/api/introduction).\n",
|
||||
"* Get your table ID from the table url as shown [here](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c12a7aff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"api_key=\"xxx\"\n",
|
||||
"base_id=\"xxx\"\n",
|
||||
"table_id=\"xxx\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ccddd5a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = AirtableLoader(api_key,table_id,base_id)\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae76c25c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Returns each table row as `dict`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "7abec7ce",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "403c95da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'id': 'recF3GbGZCuh9sXIQ',\n",
|
||||
" 'createdTime': '2023-06-09T04:47:21.000Z',\n",
|
||||
" 'fields': {'Priority': 'High',\n",
|
||||
" 'Status': 'In progress',\n",
|
||||
" 'Name': 'Document Splitters'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
"""All different types of document loaders."""
|
||||
|
||||
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
||||
from langchain.document_loaders.airtable import AirtableLoader
|
||||
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
|
||||
from langchain.document_loaders.arxiv import ArxivLoader
|
||||
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
||||
@ -135,6 +136,7 @@ TelegramChatLoader = TelegramChatFileLoader
|
||||
__all__ = [
|
||||
"AZLyricsLoader",
|
||||
"AirbyteJSONLoader",
|
||||
"AirtableLoader",
|
||||
"ApifyDatasetLoader",
|
||||
"ArxivLoader",
|
||||
"AzureBlobStorageContainerLoader",
|
||||
|
36
langchain/document_loaders/airtable.py
Normal file
36
langchain/document_loaders/airtable.py
Normal file
@ -0,0 +1,36 @@
|
||||
from typing import Iterator, List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class AirtableLoader(BaseLoader):
|
||||
"""Loader that loads local airbyte json files."""
|
||||
|
||||
def __init__(self, api_token: str, table_id: str, base_id: str):
|
||||
"""Initialize with API token and the IDs for table and base"""
|
||||
self.api_token = api_token
|
||||
self.table_id = table_id
|
||||
self.base_id = base_id
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load Table."""
|
||||
|
||||
from pyairtable import Table
|
||||
|
||||
table = Table(self.api_token, self.base_id, self.table_id)
|
||||
records = table.all()
|
||||
for record in records:
|
||||
# Need to convert record from dict to str
|
||||
yield Document(
|
||||
page_content=str(record),
|
||||
metadata={
|
||||
"source": self.base_id + "_" + self.table_id,
|
||||
"base_id": self.base_id,
|
||||
"table_id": self.table_id,
|
||||
},
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load Table."""
|
||||
return list(self.lazy_load())
|
Loading…
Reference in New Issue
Block a user