You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/langchain_community/document_transformers/__init__.py

63 lines
1.8 KiB
Python

community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
9 months ago
"""**Document Transformers** are classes to transform Documents.
**Document Transformers** usually used to transform a lot of Documents in a single run.
**Class hierarchy:**
.. code-block::
BaseDocumentTransformer --> <name> # Examples: DoctranQATransformer, DoctranTextTranslator
**Main helpers:**
.. code-block::
Document
""" # noqa: E501
from langchain_community.document_transformers.beautiful_soup_transformer import (
BeautifulSoupTransformer,
)
from langchain_community.document_transformers.doctran_text_extract import (
DoctranPropertyExtractor,
)
from langchain_community.document_transformers.doctran_text_qa import (
DoctranQATransformer,
)
from langchain_community.document_transformers.doctran_text_translate import (
DoctranTextTranslator,
)
from langchain_community.document_transformers.embeddings_redundant_filter import (
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
get_stateful_documents,
)
from langchain_community.document_transformers.google_translate import (
GoogleTranslateTransformer,
)
from langchain_community.document_transformers.html2text import Html2TextTransformer
from langchain_community.document_transformers.long_context_reorder import (
LongContextReorder,
)
from langchain_community.document_transformers.nuclia_text_transform import (
NucliaTextTransformer,
)
from langchain_community.document_transformers.openai_functions import (
OpenAIMetadataTagger,
)
__all__ = [
"BeautifulSoupTransformer",
"DoctranQATransformer",
"DoctranTextTranslator",
"DoctranPropertyExtractor",
"EmbeddingsClusteringFilter",
"EmbeddingsRedundantFilter",
"GoogleTranslateTransformer",
"get_stateful_documents",
"LongContextReorder",
"NucliaTextTransformer",
"OpenAIMetadataTagger",
"Html2TextTransformer",
]