langchain/libs/community/langchain_community/document_loaders/airbyte.py
Bagatur ed58eeb9c5
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion:

```
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
```

Moved the following to core
```
mv langchain/langchain/utils/json_schema.py core/langchain_core/utils
mv langchain/langchain/utils/html.py core/langchain_core/utils
mv langchain/langchain/utils/strings.py core/langchain_core/utils
cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py
rm langchain/langchain/utils/env.py
```

See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 13:53:30 -08:00

291 lines
10 KiB
Python

from typing import Any, Callable, Iterator, List, Mapping, Optional
from langchain_core.documents import Document
from langchain_core.utils.utils import guard_import
from langchain_community.document_loaders.base import BaseLoader
RecordHandler = Callable[[Any, Optional[str]], Document]
class AirbyteCDKLoader(BaseLoader):
"""Load with an `Airbyte` source connector implemented using the `CDK`."""
def __init__(
self,
config: Mapping[str, Any],
source_class: Any,
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
source_class: The source connector class.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
from airbyte_cdk.models.airbyte_protocol import AirbyteRecordMessage
from airbyte_cdk.sources.embedded.base_integration import (
BaseEmbeddedIntegration,
)
from airbyte_cdk.sources.embedded.runner import CDKRunner
class CDKIntegration(BaseEmbeddedIntegration):
"""A wrapper around the CDK integration."""
def _handle_record(
self, record: AirbyteRecordMessage, id: Optional[str]
) -> Document:
if record_handler:
return record_handler(record, id)
return Document(page_content="", metadata=record.data)
self._integration = CDKIntegration(
config=config,
runner=CDKRunner(source=source_class(), name=source_class.__name__),
)
self._stream_name = stream_name
self._state = state
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]:
return self._integration._load_data(
stream_name=self._stream_name, state=self._state
)
@property
def last_state(self) -> Any:
return self._integration.last_state
class AirbyteHubspotLoader(AirbyteCDKLoader):
"""Load from `Hubspot` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_hubspot", pip_name="airbyte-source-hubspot"
).SourceHubspot
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteStripeLoader(AirbyteCDKLoader):
"""Load from `Stripe` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_stripe", pip_name="airbyte-source-stripe"
).SourceStripe
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteTypeformLoader(AirbyteCDKLoader):
"""Load from `Typeform` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_typeform", pip_name="airbyte-source-typeform"
).SourceTypeform
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteZendeskSupportLoader(AirbyteCDKLoader):
"""Load from `Zendesk Support` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_zendesk_support", pip_name="airbyte-source-zendesk-support"
).SourceZendeskSupport
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteShopifyLoader(AirbyteCDKLoader):
"""Load from `Shopify` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_shopify", pip_name="airbyte-source-shopify"
).SourceShopify
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteSalesforceLoader(AirbyteCDKLoader):
"""Load from `Salesforce` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_salesforce", pip_name="airbyte-source-salesforce"
).SourceSalesforce
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)
class AirbyteGongLoader(AirbyteCDKLoader):
"""Load from `Gong` using an `Airbyte` source connector."""
def __init__(
self,
config: Mapping[str, Any],
stream_name: str,
record_handler: Optional[RecordHandler] = None,
state: Optional[Any] = None,
) -> None:
"""Initializes the loader.
Args:
config: The config to pass to the source connector.
stream_name: The name of the stream to load.
record_handler: A function that takes in a record and an optional id and
returns a Document. If None, the record will be used as the document.
Defaults to None.
state: The state to pass to the source connector. Defaults to None.
"""
source_class = guard_import(
"source_gong", pip_name="airbyte-source-gong"
).SourceGong
super().__init__(
config=config,
source_class=source_class,
stream_name=stream_name,
record_handler=record_handler,
state=state,
)