mirror of
https://github.com/hwchase17/langchain
synced 2024-11-02 09:40:22 +00:00
ed58eeb9c5
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
108 lines
4.0 KiB
Python
108 lines
4.0 KiB
Python
from typing import Any, Optional, Sequence
|
|
|
|
from langchain_core.documents import BaseDocumentTransformer, Document
|
|
|
|
from langchain_community.utilities.vertexai import get_client_info
|
|
|
|
|
|
class GoogleTranslateTransformer(BaseDocumentTransformer):
|
|
"""Translate text documents using Google Cloud Translation."""
|
|
|
|
def __init__(
|
|
self,
|
|
project_id: str,
|
|
*,
|
|
location: str = "global",
|
|
model_id: Optional[str] = None,
|
|
glossary_id: Optional[str] = None,
|
|
api_endpoint: Optional[str] = None,
|
|
) -> None:
|
|
"""
|
|
Arguments:
|
|
project_id: Google Cloud Project ID.
|
|
location: (Optional) Translate model location.
|
|
model_id: (Optional) Translate model ID to use.
|
|
glossary_id: (Optional) Translate glossary ID to use.
|
|
api_endpoint: (Optional) Regional endpoint to use.
|
|
"""
|
|
try:
|
|
from google.api_core.client_options import ClientOptions
|
|
from google.cloud import translate
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"Install Google Cloud Translate to use this parser."
|
|
"(pip install google-cloud-translate)"
|
|
) from exc
|
|
|
|
self.project_id = project_id
|
|
self.location = location
|
|
self.model_id = model_id
|
|
self.glossary_id = glossary_id
|
|
|
|
self._client = translate.TranslationServiceClient(
|
|
client_info=get_client_info("translate"),
|
|
client_options=(
|
|
ClientOptions(api_endpoint=api_endpoint) if api_endpoint else None
|
|
),
|
|
)
|
|
self._parent_path = self._client.common_location_path(project_id, location)
|
|
# For some reason, there's no `model_path()` method for the client.
|
|
self._model_path = (
|
|
f"{self._parent_path}/models/{model_id}" if model_id else None
|
|
)
|
|
self._glossary_path = (
|
|
self._client.glossary_path(project_id, location, glossary_id)
|
|
if glossary_id
|
|
else None
|
|
)
|
|
|
|
def transform_documents(
|
|
self, documents: Sequence[Document], **kwargs: Any
|
|
) -> Sequence[Document]:
|
|
"""Translate text documents using Google Translate.
|
|
|
|
Arguments:
|
|
source_language_code: ISO 639 language code of the input document.
|
|
target_language_code: ISO 639 language code of the output document.
|
|
For supported languages, refer to:
|
|
https://cloud.google.com/translate/docs/languages
|
|
mime_type: (Optional) Media Type of input text.
|
|
Options: `text/plain`, `text/html`
|
|
"""
|
|
try:
|
|
from google.cloud import translate
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"Install Google Cloud Translate to use this parser."
|
|
"(pip install google-cloud-translate)"
|
|
) from exc
|
|
|
|
response = self._client.translate_text(
|
|
request=translate.TranslateTextRequest(
|
|
contents=[doc.page_content for doc in documents],
|
|
parent=self._parent_path,
|
|
model=self._model_path,
|
|
glossary_config=translate.TranslateTextGlossaryConfig(
|
|
glossary=self._glossary_path
|
|
),
|
|
source_language_code=kwargs.get("source_language_code", None),
|
|
target_language_code=kwargs.get("target_language_code"),
|
|
mime_type=kwargs.get("mime_type", "text/plain"),
|
|
)
|
|
)
|
|
|
|
# If using a glossary, the translations will be in `glossary_translations`.
|
|
translations = response.glossary_translations or response.translations
|
|
|
|
return [
|
|
Document(
|
|
page_content=translation.translated_text,
|
|
metadata={
|
|
**doc.metadata,
|
|
"model": translation.model,
|
|
"detected_language_code": translation.detected_language_code,
|
|
},
|
|
)
|
|
for doc, translation in zip(documents, translations)
|
|
]
|