forked from Archives/langchain
parent
9218684759
commit
21bd16bb59
@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:glob:
|
:glob:
|
||||||
|
|
||||||
|
./document_loaders/examples/airtable.ipynb
|
||||||
./document_loaders/examples/audio.ipynb
|
./document_loaders/examples/audio.ipynb
|
||||||
./document_loaders/examples/conll-u.ipynb
|
./document_loaders/examples/conll-u.ipynb
|
||||||
./document_loaders/examples/copypaste.ipynb
|
./document_loaders/examples/copypaste.ipynb
|
||||||
|
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7ae421e6",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Airtable"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "98aea00d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! pip install pyairtable"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "592483eb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.document_loaders import AirtableLoader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "637e1205",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"* Get your API key [here](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).\n",
|
||||||
|
"* Get ID of your base [here](https://airtable.com/developers/web/api/introduction).\n",
|
||||||
|
"* Get your table ID from the table url as shown [here](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c12a7aff",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"api_key=\"xxx\"\n",
|
||||||
|
"base_id=\"xxx\"\n",
|
||||||
|
"table_id=\"xxx\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "ccddd5a6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader = AirtableLoader(api_key,table_id,base_id)\n",
|
||||||
|
"docs = loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ae76c25c",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Returns each table row as `dict`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "7abec7ce",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"3"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"len(docs)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "403c95da",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'id': 'recF3GbGZCuh9sXIQ',\n",
|
||||||
|
" 'createdTime': '2023-06-09T04:47:21.000Z',\n",
|
||||||
|
" 'fields': {'Priority': 'High',\n",
|
||||||
|
" 'Status': 'In progress',\n",
|
||||||
|
" 'Name': 'Document Splitters'}}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"eval(docs[0].page_content)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -1,6 +1,7 @@
|
|||||||
"""All different types of document loaders."""
|
"""All different types of document loaders."""
|
||||||
|
|
||||||
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
from langchain.document_loaders.airbyte_json import AirbyteJSONLoader
|
||||||
|
from langchain.document_loaders.airtable import AirtableLoader
|
||||||
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
|
from langchain.document_loaders.apify_dataset import ApifyDatasetLoader
|
||||||
from langchain.document_loaders.arxiv import ArxivLoader
|
from langchain.document_loaders.arxiv import ArxivLoader
|
||||||
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
from langchain.document_loaders.azlyrics import AZLyricsLoader
|
||||||
@ -135,6 +136,7 @@ TelegramChatLoader = TelegramChatFileLoader
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"AZLyricsLoader",
|
"AZLyricsLoader",
|
||||||
"AirbyteJSONLoader",
|
"AirbyteJSONLoader",
|
||||||
|
"AirtableLoader",
|
||||||
"ApifyDatasetLoader",
|
"ApifyDatasetLoader",
|
||||||
"ArxivLoader",
|
"ArxivLoader",
|
||||||
"AzureBlobStorageContainerLoader",
|
"AzureBlobStorageContainerLoader",
|
||||||
|
36
langchain/document_loaders/airtable.py
Normal file
36
langchain/document_loaders/airtable.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from typing import Iterator, List
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
|
||||||
|
class AirtableLoader(BaseLoader):
|
||||||
|
"""Loader that loads local airbyte json files."""
|
||||||
|
|
||||||
|
def __init__(self, api_token: str, table_id: str, base_id: str):
|
||||||
|
"""Initialize with API token and the IDs for table and base"""
|
||||||
|
self.api_token = api_token
|
||||||
|
self.table_id = table_id
|
||||||
|
self.base_id = base_id
|
||||||
|
|
||||||
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
|
"""Load Table."""
|
||||||
|
|
||||||
|
from pyairtable import Table
|
||||||
|
|
||||||
|
table = Table(self.api_token, self.base_id, self.table_id)
|
||||||
|
records = table.all()
|
||||||
|
for record in records:
|
||||||
|
# Need to convert record from dict to str
|
||||||
|
yield Document(
|
||||||
|
page_content=str(record),
|
||||||
|
metadata={
|
||||||
|
"source": self.base_id + "_" + self.table_id,
|
||||||
|
"base_id": self.base_id,
|
||||||
|
"table_id": self.table_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def load(self) -> List[Document]:
|
||||||
|
"""Load Table."""
|
||||||
|
return list(self.lazy_load())
|
Loading…
Reference in New Issue
Block a user