upstage: move to external repo (#22506)

3 months ago · 48d6ea427f
parent 0a4ee864e9
commit 48d6ea427f
31 changed files with 2 additions and 3732 deletions
--- a/libs/partners/upstage/LICENSE
+++ b/libs/partners/upstage/LICENSE
@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2024 LangChain, Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/libs/partners/upstage/Makefile
+++ b/libs/partners/upstage/Makefile
@ -1,57 +0,0 @@
 .PHONY: all format lint test tests integration_tests docker_tests help extended_tests
 # Default target executed when no arguments are given to make.
 all: help
 # Define a variable for the test file path.
 TEST_FILE ?= tests/unit_tests/
 integration_test integration_tests: TEST_FILE=tests/integration_tests/
 test tests integration_test integration_tests:
 	poetry run pytest $(TEST_FILE)
 ######################
 # LINTING AND FORMATTING
 ######################
 # Define a variable for Python and notebook files.
 PYTHON_FILES=.
 MYPY_CACHE=.mypy_cache
 lint format: PYTHON_FILES=.
 lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/upstage --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
 lint_package: PYTHON_FILES=langchain_upstage
 lint_tests: PYTHON_FILES=tests
 lint_tests: MYPY_CACHE=.mypy_cache_test
 lint lint_diff lint_package lint_tests:
 	poetry run ruff .
 	poetry run ruff format $(PYTHON_FILES) --diff
 	poetry run ruff --select I $(PYTHON_FILES)
 	mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
 format format_diff:
 	poetry run ruff format $(PYTHON_FILES)
 	poetry run ruff --select I --fix $(PYTHON_FILES)
 spell_check:
 	poetry run codespell --toml pyproject.toml
 spell_fix:
 	poetry run codespell --toml pyproject.toml -w
 check_imports: $(shell find langchain_upstage -name '*.py')
 	poetry run python ./scripts/check_imports.py $^
 ######################
 # HELP
 ######################
 help:
 	@echo '----'
 	@echo 'check_imports				- check imports'
 	@echo 'format                       - run code formatters'
 	@echo 'lint                         - run linters'
 	@echo 'test                         - run unit tests'
 	@echo 'tests                        - run unit tests'
 	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
--- a/libs/partners/upstage/README.md
+++ b/libs/partners/upstage/README.md
@ -1,25 +1,3 @@
-# langchain-upstage
+This package has moved!
-This package contains the LangChain integrations for [Upstage](https://upstage.ai) through their [APIs](https://developers.upstage.ai/docs/getting-started/models).
+https://github.com/langchain-ai/langchain-upstage/tree/main/libs/upstage
 ## Installation and Setup
 - Install the LangChain partner package
 ```bash
 pip install -U langchain-upstage
 ```
 - Get an Upstage api key from [Upstage Console](https://console.upstage.ai/home) and set it as an environment variable (`UPSTAGE_API_KEY`)
 ## Chat Models
 This package contains the `ChatUpstage` class, which is the recommended way to interface with Upstage models.
 See a [usage example](https://python.langchain.com/docs/integrations/chat/upstage)
 ## Embeddings
 See a [usage example](https://python.langchain.com/docs/integrations/text_embedding/upstage)
 Use `solar-embedding-1-large` model for embeddings. Do not add suffixes such as `-query` or `-passage` to the model name.
 `UpstageEmbeddings` will automatically add the suffixes based on the method called.
--- a/libs/partners/upstage/langchain_upstage/init.py
+++ b/libs/partners/upstage/langchain_upstage/init.py
@ -1,17 +0,0 @@
 from langchain_upstage.chat_models import ChatUpstage
 from langchain_upstage.embeddings import UpstageEmbeddings
 from langchain_upstage.layout_analysis import UpstageLayoutAnalysisLoader
 from langchain_upstage.layout_analysis_parsers import UpstageLayoutAnalysisParser
 from langchain_upstage.tools.groundedness_check import (
    GroundednessCheck,
    UpstageGroundednessCheck,
 )
 __all__ = [
    "ChatUpstage",
    "UpstageEmbeddings",
    "UpstageLayoutAnalysisLoader",
    "UpstageLayoutAnalysisParser",
    "UpstageGroundednessCheck",
    "GroundednessCheck",
 ]
--- a/libs/partners/upstage/langchain_upstage/chat_models.py
+++ b/libs/partners/upstage/langchain_upstage/chat_models.py
@ -1,120 +0,0 @@
 import os
 from typing import (
    Any,
    Dict,
    List,
    Optional,
 )
 import openai
 from langchain_core.language_models.chat_models import LangSmithParams
 from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
 from langchain_core.utils import (
    convert_to_secret_str,
    get_from_dict_or_env,
 )
 from langchain_openai.chat_models.base import BaseChatOpenAI
 class ChatUpstage(BaseChatOpenAI):
    """ChatUpstage chat model.
    To use, you should have the environment variable `UPSTAGE_API_KEY`
    set with your API key or pass it as a named parameter to the constructor.
    Example:
        .. code-block:: python
            from langchain_upstage import ChatUpstage
            model = ChatUpstage()
    """
    @property
    def lc_secrets(self) -> Dict[str, str]:
        return {"upstage_api_key": "UPSTAGE_API_KEY"}
    @classmethod
    def get_lc_namespace(cls) -> List[str]:
        return ["langchain", "chat_models", "upstage"]
    @property
    def lc_attributes(self) -> Dict[str, Any]:
        attributes: Dict[str, Any] = {}
        if self.upstage_api_base:
            attributes["upstage_api_base"] = self.upstage_api_base
        return attributes
    @property
    def _llm_type(self) -> str:
        """Return type of chat model."""
        return "upstage-chat"
    def _get_ls_params(
        self, stop: Optional[List[str]] = None, **kwargs: Any
    ) -> LangSmithParams:
        """Get the parameters used to invoke the model."""
        params = super()._get_ls_params(stop=stop, **kwargs)
        params["ls_provider"] = "upstage"
        return params
    model_name: str = Field(default="solar-1-mini-chat", alias="model")
    """Model name to use."""
    upstage_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
    """Automatically inferred from env are `UPSTAGE_API_KEY` if not provided."""
    upstage_api_base: Optional[str] = Field(
        default="https://api.upstage.ai/v1/solar", alias="base_url"
    )
    """Base URL path for API requests, leave blank if not using a proxy or service 
    emulator."""
    openai_api_key: Optional[SecretStr] = Field(default=None)
    """openai api key is not supported for upstage. use `upstage_api_key` instead."""
    openai_api_base: Optional[str] = Field(default=None)
    """openai api base is not supported for upstage. use `upstage_api_base` instead."""
    openai_organization: Optional[str] = Field(default=None)
    """openai organization is not supported for upstage."""
    tiktoken_model_name: Optional[str] = None
    """tiktoken is not supported for upstage."""
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that api key and python package exists in environment."""
        if values["n"] < 1:
            raise ValueError("n must be at least 1.")
        if values["n"] > 1 and values["streaming"]:
            raise ValueError("n must be 1 when streaming.")
        values["upstage_api_key"] = convert_to_secret_str(
            get_from_dict_or_env(values, "upstage_api_key", "UPSTAGE_API_KEY")
        )
        values["upstage_api_base"] = values["upstage_api_base"] or os.getenv(
            "UPSTAGE_API_BASE"
        )
        client_params = {
            "api_key": (
                values["upstage_api_key"].get_secret_value()
                if values["upstage_api_key"]
                else None
            ),
            "base_url": values["upstage_api_base"],
            "timeout": values["request_timeout"],
            "max_retries": values["max_retries"],
            "default_headers": values["default_headers"],
            "default_query": values["default_query"],
        }
        if not values.get("client"):
            sync_specific = {"http_client": values["http_client"]}
            values["client"] = openai.OpenAI(
                **client_params, **sync_specific
            ).chat.completions
        if not values.get("async_client"):
            async_specific = {"http_client": values["http_async_client"]}
            values["async_client"] = openai.AsyncOpenAI(
                **client_params, **async_specific
            ).chat.completions
        return values
--- a/libs/partners/upstage/langchain_upstage/embeddings.py
+++ b/libs/partners/upstage/langchain_upstage/embeddings.py
@ -1,276 +0,0 @@
 import logging
 import os
 import warnings
 from typing import (
    Any,
    Dict,
    List,
    Literal,
    Mapping,
    Optional,
    Sequence,
    Set,
    Tuple,
    Union,
 )
 import openai
 from langchain_core.embeddings import Embeddings
 from langchain_core.pydantic_v1 import (
    BaseModel,
    Extra,
    Field,
    SecretStr,
    root_validator,
 )
 from langchain_core.utils import (
    convert_to_secret_str,
    get_from_dict_or_env,
    get_pydantic_field_names,
 )
 logger = logging.getLogger(__name__)
 DEFAULT_EMBED_BATCH_SIZE = 10
 MAX_EMBED_BATCH_SIZE = 100
 class UpstageEmbeddings(BaseModel, Embeddings):
    """UpstageEmbeddings embedding model.
    To use, set the environment variable `UPSTAGE_API_KEY` with your API key or
    pass it as a named parameter to the constructor.
    Example:
        .. code-block:: python
            from langchain_upstage import UpstageEmbeddings
            model = UpstageEmbeddings(model='solar-embedding-1-large')
    """
    client: Any = Field(default=None, exclude=True)  #: :meta private:
    async_client: Any = Field(default=None, exclude=True)  #: :meta private:
    model: str = Field(...)
    """Embeddings model name to use. Do not add suffixes like `-query` and `-passage`.
    Instead, use 'solar-embedding-1-large' for example.
    """
    dimensions: Optional[int] = None
    """The number of dimensions the resulting output embeddings should have.
    Not yet supported. 
    """
    upstage_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
    """API Key for Solar API."""
    upstage_api_base: str = Field(
        default="https://api.upstage.ai/v1/solar", alias="base_url"
    )
    """Endpoint URL to use."""
    embedding_ctx_length: int = 4096
    """The maximum number of tokens to embed at once.
    Not yet supported.
    """
    embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE
    allowed_special: Union[Literal["all"], Set[str]] = set()
    """Not yet supported."""
    disallowed_special: Union[Literal["all"], Set[str], Sequence[str]] = "all"
    """Not yet supported."""
    chunk_size: int = 1000
    """Maximum number of texts to embed in each batch.
    Not yet supported.
    """
    max_retries: int = 2
    """Maximum number of retries to make when generating."""
    request_timeout: Optional[Union[float, Tuple[float, float], Any]] = Field(
        default=None, alias="timeout"
    )
    """Timeout for requests to Upstage embedding API. Can be float, httpx.Timeout or
        None."""
    show_progress_bar: bool = False
    """Whether to show a progress bar when embedding.
    Not yet supported.
    """
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
    """Holds any model parameters valid for `create` call not explicitly specified."""
    skip_empty: bool = False
    """Whether to skip empty strings when embedding or raise an error.
    Defaults to not skipping.
    Not yet supported."""
    default_headers: Union[Mapping[str, str], None] = None
    default_query: Union[Mapping[str, object], None] = None
    # Configure a custom httpx client. See the
    # [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
    http_client: Union[Any, None] = None
    """Optional httpx.Client. Only used for sync invocations. Must specify 
        http_async_client as well if you'd like a custom client for async invocations.
    """
    http_async_client: Union[Any, None] = None
    """Optional httpx.AsyncClient. Only used for async invocations. Must specify 
        http_client as well if you'd like a custom client for sync invocations."""
    class Config:
        extra = Extra.forbid
        allow_population_by_field_name = True
    @root_validator(pre=True)
    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
        """Build extra kwargs from additional params that were passed in."""
        all_required_field_names = get_pydantic_field_names(cls)
        extra = values.get("model_kwargs", {})
        for field_name in list(values):
            if field_name in extra:
                raise ValueError(f"Found {field_name} supplied twice.")
            if field_name not in all_required_field_names:
                warnings.warn(
                    f"""WARNING! {field_name} is not default parameter.
                    {field_name} was transferred to model_kwargs.
                    Please confirm that {field_name} is what you intended."""
                )
                extra[field_name] = values.pop(field_name)
        invalid_model_kwargs = all_required_field_names.intersection(extra.keys())
        if invalid_model_kwargs:
            raise ValueError(
                f"Parameters {invalid_model_kwargs} should be specified explicitly. "
                f"Instead they were passed in as part of `model_kwargs` parameter."
            )
        values["model_kwargs"] = extra
        return values
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that api key and python package exists in environment."""
        upstage_api_key = get_from_dict_or_env(
            values, "upstage_api_key", "UPSTAGE_API_KEY"
        )
        values["upstage_api_key"] = (
            convert_to_secret_str(upstage_api_key) if upstage_api_key else None
        )
        values["upstage_api_base"] = values["upstage_api_base"] or os.getenv(
            "UPSTAGE_API_BASE"
        )
        client_params = {
            "api_key": (
                values["upstage_api_key"].get_secret_value()
                if values["upstage_api_key"]
                else None
            ),
            "base_url": values["upstage_api_base"],
            "timeout": values["request_timeout"],
            "max_retries": values["max_retries"],
            "default_headers": values["default_headers"],
            "default_query": values["default_query"],
        }
        if not values.get("client"):
            sync_specific = {"http_client": values["http_client"]}
            values["client"] = openai.OpenAI(
                **client_params, **sync_specific
            ).embeddings
        if not values.get("async_client"):
            async_specific = {"http_client": values["http_async_client"]}
            values["async_client"] = openai.AsyncOpenAI(
                **client_params, **async_specific
            ).embeddings
        return values
    @property
    def _invocation_params(self) -> Dict[str, Any]:
        self.model = self.model.replace("-query", "").replace("-passage", "")
        params: Dict = {"model": self.model, **self.model_kwargs}
        if self.dimensions is not None:
            params["dimensions"] = self.dimensions
        return params
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of document texts using passage model.
        Args:
            texts: The list of texts to embed.
        Returns:
            List of embeddings, one for each text.
        """
        assert (
            self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
        ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
        if not texts:
            return []
        params = self._invocation_params
        params["model"] = params["model"] + "-passage"
        embeddings = []
        batch_size = min(self.embed_batch_size, len(texts))
        for i in range(0, len(texts), batch_size):
            batch = texts[i : i + batch_size]
            data = self.client.create(input=batch, **params).data
            embeddings.extend([r.embedding for r in data])
        return embeddings
    def embed_query(self, text: str) -> List[float]:
        """Embed query text using query model.
        Args:
            text: The text to embed.
        Returns:
            Embedding for the text.
        """
        params = self._invocation_params
        params["model"] = params["model"] + "-query"
        response = self.client.create(input=text, **params)
        if not isinstance(response, dict):
            response = response.model_dump()
        return response["data"][0]["embedding"]
    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of document texts using passage model asynchronously.
        Args:
            texts: The list of texts to embed.
        Returns:
            List of embeddings, one for each text.
        """
        assert (
            self.embed_batch_size <= MAX_EMBED_BATCH_SIZE
        ), f"The embed_batch_size should not be larger than {MAX_EMBED_BATCH_SIZE}."
        if not texts:
            return []
        params = self._invocation_params
        params["model"] = params["model"] + "-passage"
        embeddings = []
        batch_size = min(self.embed_batch_size, len(texts))
        for i in range(0, len(texts), batch_size):
            batch = texts[i : i + batch_size]
            response = await self.async_client.create(input=batch, **params)
            embeddings.extend([r.embedding for r in response.data])
        return embeddings
    async def aembed_query(self, text: str) -> List[float]:
        """Asynchronous Embed query text using query model.
        Args:
            text: The text to embed.
        Returns:
            Embedding for the text.
        """
        params = self._invocation_params
        params["model"] = params["model"] + "-query"
        response = await self.async_client.create(input=text, **params)
        if not isinstance(response, dict):
            response = response.model_dump()
        return response["data"][0]["embedding"]
--- a/libs/partners/upstage/langchain_upstage/layout_analysis.py
+++ b/libs/partners/upstage/langchain_upstage/layout_analysis.py
@ -1,248 +0,0 @@
 import os
 import warnings
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Literal, Optional, Union
 from langchain_core.document_loaders import BaseLoader, Blob
 from langchain_core.documents import Document
 from .layout_analysis_parsers import UpstageLayoutAnalysisParser
 DEFAULT_PAGE_BATCH_SIZE = 10
 OutputType = Literal["text", "html"]
 SplitType = Literal["none", "element", "page"]
 def validate_api_key(api_key: str) -> None:
    """
    Validates the provided API key.
    Args:
        api_key (str): The API key to be validated.
    Raises:
        ValueError: If the API key is empty or None.
    Returns:
        None
    """
    if not api_key:
        raise ValueError("API Key is required for Upstage Document Loader")
 def validate_file_path(file_path: Union[str, Path, List[str], List[Path]]) -> None:
    """
    Validates if a file exists at the given file path.
    Args:
        file_path (Union[str, Path, List[str], List[Path]): The file path(s) to be
                                                            validated.
    Raises:
        FileNotFoundError: If the file or any of the files in the list do not exist.
    """
    if isinstance(file_path, list):
        for path in file_path:
            validate_file_path(path)
        return
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
 def get_from_param_or_env(
    key: str,
    param: Optional[str] = None,
    env_key: Optional[str] = None,
    default: Optional[str] = None,
 ) -> str:
    """Get a value from a param or an environment variable."""
    if param is not None:
        return param
    elif env_key and env_key in os.environ and os.environ[env_key]:
        return os.environ[env_key]
    elif default is not None:
        return default
    else:
        raise ValueError(
            f"Did not find {key}, please add an environment variable"
            f" `{env_key}` which contains it, or pass"
            f"  `{key}` as a named parameter."
        )
 class UpstageLayoutAnalysisLoader(BaseLoader):
    """Upstage Layout Analysis.
    To use, you should have the environment variable `UPSTAGE_API_KEY`
    set with your API key or pass it as a named parameter to the constructor.
    Example:
        .. code-block:: python
            from langchain_upstage import UpstageLayoutAnalysis
            file_path = "/PATH/TO/YOUR/FILE.pdf"
            loader = UpstageLayoutAnalysis(
                        file_path, split="page", output_type="text"
                     )
    """
    def __init__(
        self,
        file_path: Union[str, Path, List[str], List[Path]],
        output_type: Union[OutputType, dict] = "html",
        split: SplitType = "none",
        api_key: Optional[str] = None,
        use_ocr: bool = False,
        exclude: list = ["header", "footer"],
    ):
        """
        Initializes an instance of the Upstage document loader.
        Args:
            file_path (Union[str, Path, List[str], List[Path]): The path to the document
                                                                to be loaded.
            output_type (Union[OutputType, dict], optional): The type of output to be
                                                             generated by the parser.
                                                             Defaults to "html".
            split (SplitType, optional): The type of splitting to be applied.
                                         Defaults to "none" (no splitting).
            api_key (str, optional): The API key for accessing the Upstage API.
                                     Defaults to None, in which case it will be
                                     fetched from the environment variable
                                     `UPSTAGE_API_KEY`.
            use_ocr (bool, optional): Extract text from images in the document.
                                      Defaults to False. (Use text info in PDF file)
            exclude (list, optional): Exclude specific elements from
                                                     the output.
                                                     Defaults to ["header", "footer"].
        """
        self.file_path = file_path
        self.output_type = output_type
        self.split = split
        if deprecated_key := os.environ.get("UPSTAGE_DOCUMENT_AI_API_KEY"):
            warnings.warn(
                "UPSTAGE_DOCUMENT_AI_API_KEY is deprecated."
                "Please use UPSTAGE_API_KEY instead."
            )
        self.api_key = get_from_param_or_env(
            "UPSTAGE_API_KEY", api_key, "UPSTAGE_API_KEY", deprecated_key
        )
        self.use_ocr = use_ocr
        self.exclude = exclude
        validate_file_path(self.file_path)
        validate_api_key(self.api_key)
    def load(self) -> List[Document]:
        """
        Loads and parses the document using the UpstageLayoutAnalysisParser.
        Returns:
            A list of Document objects representing the parsed layout analysis.
        """
        if isinstance(self.file_path, list):
            result = []
            for file_path in self.file_path:
                blob = Blob.from_path(file_path)
                parser = UpstageLayoutAnalysisParser(
                    self.api_key,
                    split=self.split,
                    output_type=self.output_type,
                    use_ocr=self.use_ocr,
                    exclude=self.exclude,
                )
                result.extend(list(parser.lazy_parse(blob, is_batch=True)))
            return result
        else:
            blob = Blob.from_path(self.file_path)
            parser = UpstageLayoutAnalysisParser(
                self.api_key,
                split=self.split,
                output_type=self.output_type,
                use_ocr=self.use_ocr,
                exclude=self.exclude,
            )
            return list(parser.lazy_parse(blob, is_batch=True))
    def lazy_load(self) -> Iterator[Document]:
        """
        Lazily loads and parses the document using the UpstageLayoutAnalysisParser.
        Returns:
            An iterator of Document objects representing the parsed layout analysis.
        """
        if isinstance(self.file_path, list):
            for file_path in self.file_path:
                blob = Blob.from_path(file_path)
                parser = UpstageLayoutAnalysisParser(
                    self.api_key,
                    split=self.split,
                    output_type=self.output_type,
                    use_ocr=self.use_ocr,
                    exclude=self.exclude,
                )
                yield from parser.lazy_parse(blob, is_batch=True)
        else:
            blob = Blob.from_path(self.file_path)
            parser = UpstageLayoutAnalysisParser(
                self.api_key,
                split=self.split,
                output_type=self.output_type,
                use_ocr=self.use_ocr,
                exclude=self.exclude,
            )
            yield from parser.lazy_parse(blob)
    def merge_and_split(
        self, documents: List[Document], splitter: Optional[object] = None
    ) -> List[Document]:
        """
        Merges the page content and metadata of multiple documents into a single
        document, or splits the documents using a custom splitter.
        Args:
            documents (list): A list of Document objects to be merged and split.
            splitter (object, optional): An optional splitter object that implements the
                `split_documents` method. If provided, the documents will be split using
                this splitter. Defaults to None, in which case the documents are merged.
        Returns:
            list: A list of Document objects. If no splitter is provided, a single
            Document object is returned with the merged content and combined metadata.
            If a splitter is provided, the documents are split and a list of Document
            objects is returned.
        Raises:
            AssertionError: If a splitter is provided but it does not implement the
            `split_documents` method.
        """
        if splitter is None:
            merged_content = " ".join([doc.page_content for doc in documents])
            metadatas: Dict[str, Any] = dict()
            for _meta in [doc.metadata for doc in documents]:
                for key, value in _meta.items():
                    if key in metadatas:
                        metadatas[key].append(value)
                    else:
                        metadatas[key] = [value]
            return [Document(page_content=merged_content, metadata=metadatas)]
        else:
            assert hasattr(
                splitter, "split_documents"
            ), "splitter must implement split_documents method"
            return splitter.split_documents(documents)
--- a/libs/partners/upstage/langchain_upstage/layout_analysis_parsers.py
+++ b/libs/partners/upstage/langchain_upstage/layout_analysis_parsers.py
@ -1,396 +0,0 @@
 import io
 import json
 import os
 import warnings
 from typing import Dict, Iterator, List, Literal, Optional, Union
 import fitz  # type: ignore
 import requests
 from fitz import Document as fitzDocument
 from langchain_core.document_loaders import BaseBlobParser, Blob
 from langchain_core.documents import Document
 LAYOUT_ANALYSIS_URL = "https://api.upstage.ai/v1/document-ai/layout-analysis"
 DEFAULT_NUMBER_OF_PAGE = 10
 OutputType = Literal["text", "html"]
 SplitType = Literal["none", "element", "page"]
 def validate_api_key(api_key: str) -> None:
    """
    Validates the provided API key.
    Args:
        api_key (str): The API key to be validated.
    Raises:
        ValueError: If the API key is empty or None.
    Returns:
        None
    """
    if not api_key:
        raise ValueError("API Key is required for Upstage Document Loader")
 def validate_file_path(file_path: str) -> None:
    """
    Validates if a file exists at the given file path.
    Args:
        file_path (str): The path to the file.
    Raises:
        FileNotFoundError: If the file does not exist at the given file path.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
 def parse_output(data: dict, output_type: Union[OutputType, dict]) -> str:
    """
    Parse the output data based on the specified output type.
    Args:
        data (dict): The data to be parsed.
        output_type (Union[OutputType, dict]): The output type to parse the element data
                                               into.
    Returns:
        str: The parsed output.
    Raises:
        ValueError: If the output type is invalid.
    """
    if isinstance(output_type, dict):
        if data["category"] in output_type:
            return data[output_type[data["category"]]]
        else:
            return data["text"]
    elif isinstance(output_type, str):
        if output_type == "text":
            return data["text"]
        elif output_type == "html":
            return data["html"]
        else:
            raise ValueError(f"Invalid output type: {output_type}")
    else:
        raise ValueError(f"Invalid output type: {output_type}")
 def get_from_param_or_env(
    key: str,
    param: Optional[str] = None,
    env_key: Optional[str] = None,
    default: Optional[str] = None,
 ) -> str:
    """Get a value from a param or an environment variable."""
    if param is not None:
        return param
    elif env_key and env_key in os.environ and os.environ[env_key]:
        return os.environ[env_key]
    elif default is not None:
        return default
    else:
        raise ValueError(
            f"Did not find {key}, please add an environment variable"
            f" `{env_key}` which contains it, or pass"
            f"  `{key}` as a named parameter."
        )
 class UpstageLayoutAnalysisParser(BaseBlobParser):
    """Upstage Layout Analysis Parser.
    To use, you should have the environment variable `UPSTAGE_API_KEY`
    set with your API key or pass it as a named parameter to the constructor.
    Example:
        .. code-block:: python
            from langchain_upstage import UpstageLayoutAnalysisParser
            loader = UpstageLayoutAnalysisParser(split="page", output_type="text")
    """
    def __init__(
        self,
        api_key: Optional[str] = None,
        output_type: Union[OutputType, dict] = "html",
        split: SplitType = "none",
        use_ocr: bool = False,
        exclude: list = [],
    ):
        """
        Initializes an instance of the Upstage class.
        Args:
            api_key (str, optional): The API key for accessing the Upstage API.
                                     Defaults to None, in which case it will be
                                     fetched from the environment variable
                                     `UPSTAGE_API_KEY`.
            output_type (Union[OutputType, dict], optional): The type of output to be
                                                             generated by the parser.
                                                             Defaults to "html".
            split (SplitType, optional): The type of splitting to be applied.
                                         Defaults to "none" (no splitting).
            use_ocr (bool, optional): Extract text from images in the document.
                                      Defaults to False. (Use text info in PDF file)
            exclude (list, optional): Exclude specific elements from the output.
                                      Defaults to [] (all included).
        """
        if deprecated_key := os.environ.get("UPSTAGE_DOCUMENT_AI_API_KEY"):
            warnings.warn(
                "UPSTAGE_DOCUMENT_AI_API_KEY is deprecated."
                "Please use UPSTAGE_API_KEY instead."
            )
        self.api_key = get_from_param_or_env(
            "UPSTAGE_API_KEY", api_key, "UPSTAGE_API_KEY", deprecated_key
        )
        self.output_type = output_type
        self.split = split
        self.use_ocr = use_ocr
        self.exclude = exclude
        validate_api_key(self.api_key)
    def _get_response(self, files: Dict) -> List:
        """
        Sends a POST request to the API endpoint with the provided files and
        returns the response.
        Args:
            files (dict): A dictionary containing the files to be sent in the request.
        Returns:
            dict: The JSON response from the API.
        Raises:
            ValueError: If there is an error in the API call.
        """
        try:
            headers = {"Authorization": f"Bearer {self.api_key}"}
            options = {"ocr": self.use_ocr}
            response = requests.post(
                LAYOUT_ANALYSIS_URL, headers=headers, files=files, data=options
            )
            response.raise_for_status()
            result = response.json().get("elements", [])
            elements = [
                element for element in result if element["category"] not in self.exclude
            ]
            return elements
        except requests.RequestException as req_err:
            # Handle any request-related exceptions
            print(f"Request Exception: {req_err}")
            raise ValueError(f"Failed to send request: {req_err}")
        except json.JSONDecodeError as json_err:
            # Handle JSON decode errors
            print(f"JSON Decode Error: {json_err}")
            raise ValueError(f"Failed to decode JSON response: {json_err}")
        return []
    def _split_and_request(
        self,
        full_docs: fitzDocument,
        start_page: int,
        num_pages: int = DEFAULT_NUMBER_OF_PAGE,
    ) -> List:
        """
        Splits the full pdf document into partial pages and sends a request to the
        server.
        Args:
            full_docs (str): The full document to be split and requested.
            start_page (int): The starting page number for splitting the document.
            num_pages (int, optional): The number of pages to split the document
                                             into.
                                             Defaults to DEFAULT_NUMBER_OF_PAGE.
        Returns:
            response: The response from the server.
        """
        with fitz.open() as chunk_pdf:
            chunk_pdf.insert_pdf(
                full_docs,
                from_page=start_page,
                to_page=start_page + num_pages - 1,
            )
            pdf_bytes = chunk_pdf.write()
        with io.BytesIO(pdf_bytes) as f:
            response = self._get_response({"document": f})
        return response
    def _element_document(self, elements: Dict) -> Document:
        """
        Converts an elements into a Document object.
        Args:
            elements: The elements to convert.
        Returns:
            A list containing a single Document object.
        """
        return Document(
            page_content=(parse_output(elements, self.output_type)),
            metadata={
                "page": elements["page"],
                "id": elements["id"],
                "type": self.output_type,
                "split": self.split,
                "bbox": elements["bounding_box"],
                "category": elements["category"],
            },
        )
    def _page_document(self, elements: List) -> List[Document]:
        """
        Combines elements with the same page number into a single Document object.
        Args:
            elements (List): A list of elements containing page numbers.
        Returns:
            List[Document]: A list of Document objects, each representing a page
                            with its content and metadata.
        """
        _docs = []
        pages = sorted(set(map(lambda x: x["page"], elements)))
        page_group = [
            [element for element in elements if element["page"] == x] for x in pages
        ]
        for group in page_group:
            page_content = " ".join(
                [parse_output(element, self.output_type) for element in group]
            )
            _docs.append(
                Document(
                    page_content=page_content,
                    metadata={
                        "page": group[0]["page"],
                        "type": self.output_type,
                        "split": self.split,
                    },
                )
            )
        return _docs
    def lazy_parse(self, blob: Blob, is_batch: bool = False) -> Iterator[Document]:
        """
        Lazily parses a document and yields Document objects based on the specified
        split type.
        Args:
            blob (Blob): The input document blob to parse.
            is_batch (bool, optional): Whether to parse the document in batches.
                                       Defaults to False (single page parsing)
        Yields:
            Document: The parsed document object.
        Raises:
            ValueError: If an invalid split type is provided.
        """
        if is_batch:
            num_pages = DEFAULT_NUMBER_OF_PAGE
        else:
            num_pages = 1
        full_docs = fitz.open(blob.path)
        number_of_pages = full_docs.page_count
        if self.split == "none":
            if full_docs.is_pdf:
                result = ""
                start_page = 0
                num_pages = DEFAULT_NUMBER_OF_PAGE
                for _ in range(number_of_pages):
                    if start_page >= number_of_pages:
                        break
                    elements = self._split_and_request(full_docs, start_page, num_pages)
                    for element in elements:
                        result += parse_output(element, self.output_type)
                    start_page += num_pages
            else:
                if not blob.path:
                    raise ValueError("Blob path is required for non-PDF files.")
                result = ""
                with open(blob.path, "rb") as f:
                    elements = self._get_response({"document": f})
                for element in elements:
                    result += parse_output(element, self.output_type)
            yield Document(
                page_content=result,
                metadata={
                    "total_pages": number_of_pages,
                    "type": self.output_type,
                    "split": self.split,
                },
            )
        elif self.split == "element":
            if full_docs.is_pdf:
                start_page = 0
                for _ in range(number_of_pages):
                    if start_page >= number_of_pages:
                        break
                    elements = self._split_and_request(full_docs, start_page, num_pages)
                    for element in elements:
                        yield self._element_document(element)
                    start_page += num_pages
            else:
                if not blob.path:
                    raise ValueError("Blob path is required for non-PDF files.")
                with open(blob.path, "rb") as f:
                    elements = self._get_response({"document": f})
                for element in elements:
                    yield self._element_document(element)
        elif self.split == "page":
            if full_docs.is_pdf:
                start_page = 0
                for _ in range(number_of_pages):
                    if start_page >= number_of_pages:
                        break
                    elements = self._split_and_request(full_docs, start_page, num_pages)
                    yield from self._page_document(elements)
                    start_page += num_pages
            else:
                if not blob.path:
                    raise ValueError("Blob path is required for non-PDF files.")
                with open(blob.path, "rb") as f:
                    elements = self._get_response({"document": f})
                yield from self._page_document(elements)
        else:
            raise ValueError(f"Invalid split type: {self.split}")
--- a/libs/partners/upstage/langchain_upstage/py.typed
+++ b/libs/partners/upstage/langchain_upstage/py.typed
--- a/libs/partners/upstage/langchain_upstage/tools/groundedness_check.py
+++ b/libs/partners/upstage/langchain_upstage/tools/groundedness_check.py
@ -1,117 +0,0 @@
 import os
 from typing import Any, List, Literal, Optional, Type, Union
 from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
 )
 from langchain_core.documents import Document
 from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr
 from langchain_core.tools import BaseTool
 from langchain_core.utils import convert_to_secret_str
 from langchain_upstage import ChatUpstage
 class UpstageGroundednessCheckInput(BaseModel):
    """Input for the Groundedness Check tool."""
    context: Union[str, List[Document]] = Field(
        description="context in which the answer should be verified"
    )
    answer: str = Field(
        description="assistant's reply or a text that is subject to groundedness check"
    )
 class UpstageGroundednessCheck(BaseTool):
    """Tool that checks the groundedness of a context and an assistant message.
    To use, you should have the environment variable `UPSTAGE_API_KEY`
    set with your API key or pass it as a named parameter to the constructor.
    Example:
        .. code-block:: python
                from langchain_upstage import UpstageGroundednessCheck
                tool = UpstageGroundednessCheck()
    """
    name: str = "groundedness_check"
    description: str = (
        "A tool that checks the groundedness of an assistant response "
        "to user-provided context. UpstageGroundednessCheck ensures that "
        "the assistant’s response is not only relevant but also "
        "precisely aligned with the user's initial context, "
        "promoting a more reliable and context-aware interaction. "
        "When using retrieval-augmented generation (RAG), "
        "the Groundedness Check can be used to determine whether "
        "the assistant's message is grounded in the provided context."
    )
    upstage_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
    api_wrapper: ChatUpstage
    args_schema: Type[BaseModel] = UpstageGroundednessCheckInput
    def __init__(self, **kwargs: Any) -> None:
        upstage_api_key = kwargs.get("upstage_api_key", None)
        if not upstage_api_key:
            upstage_api_key = kwargs.get("api_key", None)
        if not upstage_api_key:
            upstage_api_key = SecretStr(os.getenv("UPSTAGE_API_KEY", ""))
        upstage_api_key = convert_to_secret_str(upstage_api_key)
        if (
            not upstage_api_key
            or not upstage_api_key.get_secret_value()
            or upstage_api_key.get_secret_value() == ""
        ):
            raise ValueError("UPSTAGE_API_KEY must be set or passed")
        api_wrapper = ChatUpstage(
            model_name="solar-1-mini-answer-verification",
            upstage_api_key=upstage_api_key.get_secret_value(),
        )
        super().__init__(upstage_api_key=upstage_api_key, api_wrapper=api_wrapper)
    def formatDocumentsAsString(self, docs: List[Document]) -> str:
        return "\n".join([doc.page_content for doc in docs])
    def _run(
        self,
        context: Union[str, List[Document]],
        answer: str,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> Union[str, Literal["grounded", "notGrounded", "notSure"]]:
        """Use the tool."""
        if isinstance(context, List):
            context = self.formatDocumentsAsString(context)
        response = self.api_wrapper.invoke(
            [HumanMessage(context), AIMessage(answer)], stream=False
        )
        return str(response.content)
    async def _arun(
        self,
        context: Union[str, List[Document]],
        answer: str,
        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
    ) -> Union[str, Literal["grounded", "notGrounded", "notSure"]]:
        if isinstance(context, List):
            context = self.formatDocumentsAsString(context)
        response = await self.api_wrapper.ainvoke(
            [HumanMessage(context), AIMessage(answer)], stream=False
        )
        return str(response.content)
@deprecated(
    since="0.1.3",
    removal="0.3.0",
    alternative_import="langchain_upstage.UpstageGroundednessCheck",
 )
 class GroundednessCheck(UpstageGroundednessCheck):
    pass
--- a/libs/partners/upstage/poetry.lock
+++ b/libs/partners/upstage/poetry.lock
--- a/libs/partners/upstage/pyproject.toml
+++ b/libs/partners/upstage/pyproject.toml
@ -1,104 +0,0 @@
 [tool.poetry]
 name = "langchain-upstage"
 version = "0.1.6"
 description = "An integration package connecting Upstage and LangChain"
 authors = []
 readme = "README.md"
 repository = "https://github.com/langchain-ai/langchain"
 license = "MIT"
 [tool.poetry.urls]
 "Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/upstage"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"
 langchain-core = ">=0.2.0,<0.3"
 langchain-openai = "^0.1.8"
 pymupdf = "^1.24.1"
 requests = "^2.31.0"
 [tool.poetry.group.test]
 optional = true
 [tool.poetry.group.test.dependencies]
 pytest = "^7.3.0"
 freezegun = "^1.2.2"
 pytest-mock = "^3.10.0"
 syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
 langchain-openai = { path = "../openai", develop = true }
 langchain-core = { path = "../../core", develop = true }
 docarray = "^0.32.1"
 langchain-standard-tests = { path = "../../standard-tests", develop = true }
 [tool.poetry.group.codespell]
 optional = true
 [tool.poetry.group.codespell.dependencies]
 codespell = "^2.2.0"
 [tool.poetry.group.test_integration]
 optional = true
 [tool.poetry.group.test_integration.dependencies]
 # Support Python 3.8 and 3.12+.
 numpy = [
    {version = "^1", python = "<3.12"},
    {version = "^1.26.0", python = ">=3.12"}
 ]
 [tool.poetry.group.lint]
 optional = true
 [tool.poetry.group.lint.dependencies]
 ruff = "^0.1.5"
 [tool.poetry.group.typing.dependencies]
 mypy = "^0.991"
 types-requests = ">=2.31.0"
 langchain-core = { path = "../../core", develop = true }
 [tool.poetry.group.dev]
 optional = true
 [tool.poetry.group.dev.dependencies]
 langchain-core = { path = "../../core", develop = true }
 [tool.ruff.lint]
 select = [
  "E", # pycodestyle
  "F", # pyflakes
  "I", # isort
 ]
 [tool.mypy]
 disallow_untyped_defs = "True"
 [tool.coverage.run]
 omit = ["tests/*"]
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 [tool.pytest.ini_options]
 # --strict-markers will raise errors on unknown marks.
 # https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
 #
 # https://docs.pytest.org/en/7.1.x/reference/reference.html
 # --strict-config       any warnings encountered while parsing the `pytest`
 #                       section of the configuration file raise errors.
 #
 # https://github.com/tophat/syrupy
 # --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
 addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
 # Registering custom markers.
 # https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
 markers = [
  "requires: mark tests as requiring a specific library",
  "asyncio: mark tests as requiring asyncio",
  "compile: mark placeholder test used to compile integration tests without running them",
 ]
 asyncio_mode = "auto"
--- a/libs/partners/upstage/scripts/check_imports.py
+++ b/libs/partners/upstage/scripts/check_imports.py
@ -1,17 +0,0 @@
 import sys
 import traceback
 from importlib.machinery import SourceFileLoader
 if __name__ == "__main__":
    files = sys.argv[1:]
    has_failure = False
    for file in files:
        try:
            SourceFileLoader("x", file).load_module()
        except Exception:
            has_faillure = True
            print(file)
            traceback.print_exc()
            print()
    sys.exit(1 if has_failure else 0)
--- a/libs/partners/upstage/scripts/check_pydantic.sh
+++ b/libs/partners/upstage/scripts/check_pydantic.sh
@ -1,27 +0,0 @@
 #!/bin/bash
 #
 # This script searches for lines starting with "import pydantic" or "from pydantic"
 # in tracked files within a Git repository.
 #
 # Usage: ./scripts/check_pydantic.sh /path/to/repository
 # Check if a path argument is provided
 if [ $# -ne 1 ]; then
  echo "Usage: $0 /path/to/repository"
  exit 1
 fi
 repository_path="$1"
 # Search for lines matching the pattern within the specified repository
 result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
 # Check if any matching lines were found
 if [ -n "$result" ]; then
  echo "ERROR: The following lines need to be updated:"
  echo "$result"
  echo "Please replace the code with an import from langchain_core.pydantic_v1."
  echo "For example, replace 'from pydantic import BaseModel'"
  echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
  exit 1
 fi
--- a/libs/partners/upstage/scripts/lint_imports.sh
+++ b/libs/partners/upstage/scripts/lint_imports.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 set -eu
 # Initialize a variable to keep track of errors
 errors=0
 # make sure not importing from langchain or langchain_experimental
 git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
 git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
 # Decide on an exit status based on the errors
 if [ "$errors" -gt 0 ]; then
    exit 1
 else
    exit 0
 fi
--- a/libs/partners/upstage/tests/init.py
+++ b/libs/partners/upstage/tests/init.py
--- a/libs/partners/upstage/tests/examples/solar.pdf
+++ b/libs/partners/upstage/tests/examples/solar.pdf
--- a/libs/partners/upstage/tests/integration_tests/init.py
+++ b/libs/partners/upstage/tests/integration_tests/init.py
--- a/libs/partners/upstage/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/upstage/tests/integration_tests/test_chat_models.py
@ -1,136 +0,0 @@
 import pytest
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
 from langchain_upstage import ChatUpstage
 def test_chat_upstage_model() -> None:
    """Test ChatUpstage wrapper handles model_name."""
    chat = ChatUpstage(model="foo")
    assert chat.model_name == "foo"
    chat = ChatUpstage(model_name="bar")
    assert chat.model_name == "bar"
 def test_chat_upstage_system_message() -> None:
    """Test ChatOpenAI wrapper with system message."""
    chat = ChatUpstage(max_tokens=10)
    system_message = SystemMessage(content="You are to chat with the user.")
    human_message = HumanMessage(content="Hello")
    response = chat.invoke([system_message, human_message])
    assert isinstance(response, BaseMessage)
    assert isinstance(response.content, str)
 def test_chat_upstage_llm_output_contains_model_name() -> None:
    """Test llm_output contains model_name."""
    chat = ChatUpstage(max_tokens=10)
    message = HumanMessage(content="Hello")
    llm_result = chat.generate([[message]])
    assert llm_result.llm_output is not None
    assert llm_result.llm_output["model_name"] == chat.model_name
 def test_chat_upstage_streaming_llm_output_contains_model_name() -> None:
    """Test llm_output contains model_name."""
    chat = ChatUpstage(max_tokens=10, streaming=True)
    message = HumanMessage(content="Hello")
    llm_result = chat.generate([[message]])
    assert llm_result.llm_output is not None
    assert llm_result.llm_output["model_name"] == chat.model_name
 def test_chat_upstage_invalid_streaming_params() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    with pytest.raises(ValueError):
        ChatUpstage(
            max_tokens=10,
            streaming=True,
            temperature=0,
            n=5,
        )
 def test_chat_upstage_extra_kwargs() -> None:
    """Test extra kwargs to chat upstage."""
    # Check that foo is saved in extra_kwargs.
    llm = ChatUpstage(foo=3, max_tokens=10)
    assert llm.max_tokens == 10
    assert llm.model_kwargs == {"foo": 3}
    # Test that if extra_kwargs are provided, they are added to it.
    llm = ChatUpstage(foo=3, model_kwargs={"bar": 2})
    assert llm.model_kwargs == {"foo": 3, "bar": 2}
    # Test that if provided twice it errors
    with pytest.raises(ValueError):
        ChatUpstage(foo=3, model_kwargs={"foo": 2})
    # Test that if explicit param is specified in kwargs it errors
    with pytest.raises(ValueError):
        ChatUpstage(model_kwargs={"temperature": 0.2})
    # Test that "model" cannot be specified in kwargs
    with pytest.raises(ValueError):
        ChatUpstage(model_kwargs={"model": "solar-1-mini-chat"})
 def test_stream() -> None:
    """Test streaming tokens from OpenAI."""
    llm = ChatUpstage()
    for token in llm.stream("I'm Pickle Rick"):
        assert isinstance(token.content, str)
 async def test_astream() -> None:
    """Test streaming tokens from OpenAI."""
    llm = ChatUpstage()
    async for token in llm.astream("I'm Pickle Rick"):
        assert isinstance(token.content, str)
 async def test_abatch() -> None:
    """Test streaming tokens from ChatUpstage."""
    llm = ChatUpstage()
    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
    for token in result:
        assert isinstance(token.content, str)
 async def test_abatch_tags() -> None:
    """Test batch tokens from ChatUpstage."""
    llm = ChatUpstage()
    result = await llm.abatch(
        ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
    )
    for token in result:
        assert isinstance(token.content, str)
 def test_batch() -> None:
    """Test batch tokens from ChatUpstage."""
    llm = ChatUpstage()
    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
    for token in result:
        assert isinstance(token.content, str)
 async def test_ainvoke() -> None:
    """Test invoke tokens from ChatUpstage."""
    llm = ChatUpstage()
    result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
    assert isinstance(result.content, str)
 def test_invoke() -> None:
    """Test invoke tokens from ChatUpstage."""
    llm = ChatUpstage()
    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
    assert isinstance(result.content, str)
--- a/libs/partners/upstage/tests/integration_tests/test_chat_models_standard.py
+++ b/libs/partners/upstage/tests/integration_tests/test_chat_models_standard.py
@ -1,18 +0,0 @@
 """Standard LangChain interface tests"""
 from typing import Type
 from langchain_core.language_models import BaseChatModel
 from langchain_standard_tests.integration_tests import ChatModelIntegrationTests
 from langchain_upstage import ChatUpstage
 class TestUpstageStandard(ChatModelIntegrationTests):
    @property
    def chat_model_class(self) -> Type[BaseChatModel]:
        return ChatUpstage
    @property
    def chat_model_params(self) -> dict:
        return {"model": "solar-1-mini-chat"}
--- a/libs/partners/upstage/tests/integration_tests/test_compile.py
+++ b/libs/partners/upstage/tests/integration_tests/test_compile.py
@ -1,7 +0,0 @@
 import pytest
@pytest.mark.compile
 def test_placeholder() -> None:
    """Used for compiling integration tests without running any real tests."""
    pass
--- a/libs/partners/upstage/tests/integration_tests/test_embeddings.py
+++ b/libs/partners/upstage/tests/integration_tests/test_embeddings.py
@ -1,51 +0,0 @@
 """Test Upstage embeddings."""
 from langchain_upstage import UpstageEmbeddings
 def test_langchain_upstage_embed_documents() -> None:
    """Test Upstage embeddings."""
    documents = ["foo bar", "bar foo"]
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = embedding.embed_documents(documents)
    assert len(output) == 2
    assert len(output[0]) > 0
 def test_langchain_upstage_embed_query() -> None:
    """Test Upstage embeddings."""
    query = "foo bar"
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = embedding.embed_query(query)
    assert len(output) > 0
 async def test_langchain_upstage_aembed_documents() -> None:
    """Test Upstage embeddings asynchronous."""
    documents = ["foo bar", "bar foo"]
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = await embedding.aembed_documents(documents)
    assert len(output) == 2
    assert len(output[0]) > 0
 async def test_langchain_upstage_aembed_query() -> None:
    """Test Upstage embeddings asynchronous."""
    query = "foo bar"
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = await embedding.aembed_query(query)
    assert len(output) > 0
 def test_langchain_upstage_embed_documents_with_empty_list() -> None:
    """Test Upstage embeddings with empty list."""
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = embedding.embed_documents([])
    assert len(output) == 0
 async def test_langchain_upstage_aembed_documents_with_empty_list() -> None:
    """Test Upstage embeddings asynchronous with empty list."""
    embedding = UpstageEmbeddings(model="solar-embedding-1-large")
    output = await embedding.aembed_documents([])
    assert len(output) == 0
--- a/libs/partners/upstage/tests/integration_tests/test_groundedness_check.py
+++ b/libs/partners/upstage/tests/integration_tests/test_groundedness_check.py
@ -1,63 +0,0 @@
 import os
 import openai
 import pytest
 from langchain_core.documents import Document
 from langchain_upstage import GroundednessCheck, UpstageGroundednessCheck
 def test_langchain_upstage_groundedness_check_deprecated() -> None:
    """Test Upstage Groundedness Check."""
    tool = GroundednessCheck()
    output = tool.invoke({"context": "foo bar", "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
    api_key = os.environ.get("UPSTAGE_API_KEY", None)
    tool = GroundednessCheck(upstage_api_key=api_key)
    output = tool.invoke({"context": "foo bar", "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
 def test_langchain_upstage_groundedness_check() -> None:
    """Test Upstage Groundedness Check."""
    tool = UpstageGroundednessCheck()
    output = tool.invoke({"context": "foo bar", "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
    api_key = os.environ.get("UPSTAGE_API_KEY", None)
    tool = UpstageGroundednessCheck(upstage_api_key=api_key)
    output = tool.invoke({"context": "foo bar", "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
 def test_langchain_upstage_groundedness_check_with_documents_input() -> None:
    """Test Upstage Groundedness Check."""
    tool = UpstageGroundednessCheck()
    docs = [
        Document(page_content="foo bar"),
        Document(page_content="bar foo"),
    ]
    output = tool.invoke({"context": docs, "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
 def test_langchain_upstage_groundedness_check_fail_with_wrong_api_key() -> None:
    tool = UpstageGroundednessCheck(api_key="wrong-key")
    with pytest.raises(openai.AuthenticationError):
        tool.invoke({"context": "foo bar", "answer": "bar foo"})
 async def test_langchain_upstage_groundedness_check_async() -> None:
    """Test Upstage Groundedness Check asynchronous."""
    tool = UpstageGroundednessCheck()
    output = await tool.ainvoke({"context": "foo bar", "answer": "bar foo"})
    assert output in ["grounded", "notGrounded", "notSure"]
--- a/libs/partners/upstage/tests/unit_tests/init.py
+++ b/libs/partners/upstage/tests/unit_tests/init.py
--- a/libs/partners/upstage/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/upstage/tests/unit_tests/test_chat_models.py
@ -1,194 +0,0 @@
 import json
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 from langchain_core.messages import (
    AIMessage,
    FunctionMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
 )
 from langchain_openai.chat_models.base import (
    _convert_dict_to_message,
    _convert_message_to_dict,
 )
 from langchain_upstage import ChatUpstage
 def test_initialization() -> None:
    """Test chat model initialization."""
    ChatUpstage()
 def test_upstage_model_param() -> None:
    llm = ChatUpstage(model="foo")
    assert llm.model_name == "foo"
    llm = ChatUpstage(model_name="foo")
    assert llm.model_name == "foo"
    ls_params = llm._get_ls_params()
    assert ls_params["ls_provider"] == "upstage"
 def test_function_dict_to_message_function_message() -> None:
    content = json.dumps({"result": "Example #1"})
    name = "test_function"
    result = _convert_dict_to_message(
        {
            "role": "function",
            "name": name,
            "content": content,
        }
    )
    assert isinstance(result, FunctionMessage)
    assert result.name == name
    assert result.content == content
 def test_convert_dict_to_message_human() -> None:
    message = {"role": "user", "content": "foo"}
    result = _convert_dict_to_message(message)
    expected_output = HumanMessage(content="foo")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test__convert_dict_to_message_human_with_name() -> None:
    message = {"role": "user", "content": "foo", "name": "test"}
    result = _convert_dict_to_message(message)
    expected_output = HumanMessage(content="foo", name="test")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test_convert_dict_to_message_ai() -> None:
    message = {"role": "assistant", "content": "foo"}
    result = _convert_dict_to_message(message)
    expected_output = AIMessage(content="foo")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test_convert_dict_to_message_ai_with_name() -> None:
    message = {"role": "assistant", "content": "foo", "name": "test"}
    result = _convert_dict_to_message(message)
    expected_output = AIMessage(content="foo", name="test")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test_convert_dict_to_message_system() -> None:
    message = {"role": "system", "content": "foo"}
    result = _convert_dict_to_message(message)
    expected_output = SystemMessage(content="foo")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test_convert_dict_to_message_system_with_name() -> None:
    message = {"role": "system", "content": "foo", "name": "test"}
    result = _convert_dict_to_message(message)
    expected_output = SystemMessage(content="foo", name="test")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
 def test_convert_dict_to_message_tool() -> None:
    message = {"role": "tool", "content": "foo", "tool_call_id": "bar"}
    result = _convert_dict_to_message(message)
    expected_output = ToolMessage(content="foo", tool_call_id="bar")
    assert result == expected_output
    assert _convert_message_to_dict(expected_output) == message
@pytest.fixture
 def mock_completion() -> dict:
    return {
        "id": "chatcmpl-7fcZavknQda3SQ",
        "object": "chat.completion",
        "created": 1689989000,
        "model": "solar-1-mini-chat",
        "choices": [
            {
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": "Bab",
                    "name": "KimSolar",
                },
                "finish_reason": "stop",
            }
        ],
    }
 def test_upstage_invoke(mock_completion: dict) -> None:
    llm = ChatUpstage()
    mock_client = MagicMock()
    completed = False
    def mock_create(*args: Any, **kwargs: Any) -> Any:
        nonlocal completed
        completed = True
        return mock_completion
    mock_client.create = mock_create
    with patch.object(
        llm,
        "client",
        mock_client,
    ):
        res = llm.invoke("bab")
        assert res.content == "Bab"
    assert completed
 async def test_upstage_ainvoke(mock_completion: dict) -> None:
    llm = ChatUpstage()
    mock_client = AsyncMock()
    completed = False
    async def mock_create(*args: Any, **kwargs: Any) -> Any:
        nonlocal completed
        completed = True
        return mock_completion
    mock_client.create = mock_create
    with patch.object(
        llm,
        "async_client",
        mock_client,
    ):
        res = await llm.ainvoke("bab")
        assert res.content == "Bab"
    assert completed
 def test_upstage_invoke_name(mock_completion: dict) -> None:
    llm = ChatUpstage()
    mock_client = MagicMock()
    mock_client.create.return_value = mock_completion
    with patch.object(
        llm,
        "client",
        mock_client,
    ):
        messages = [
            HumanMessage(content="Foo", name="Zorba"),
        ]
        res = llm.invoke(messages)
        call_args, call_kwargs = mock_client.create.call_args
        assert len(call_args) == 0  # no positional args
        call_messages = call_kwargs["messages"]
        assert len(call_messages) == 1
        assert call_messages[0]["role"] == "user"
        assert call_messages[0]["content"] == "Foo"
        assert call_messages[0]["name"] == "Zorba"
        # check return type has name
        assert res.content == "Bab"
        assert res.name == "KimSolar"
--- a/libs/partners/upstage/tests/unit_tests/test_chat_models_standard.py
+++ b/libs/partners/upstage/tests/unit_tests/test_chat_models_standard.py
@ -1,18 +0,0 @@
 """Standard LangChain interface tests"""
 from typing import Type
 from langchain_core.language_models import BaseChatModel
 from langchain_standard_tests.unit_tests import ChatModelUnitTests
 from langchain_upstage import ChatUpstage
 class TestUpstageStandard(ChatModelUnitTests):
    @property
    def chat_model_class(self) -> Type[BaseChatModel]:
        return ChatUpstage
    @property
    def chat_model_params(self) -> dict:
        return {"model": "solar-1-mini-chat"}
--- a/libs/partners/upstage/tests/unit_tests/test_embeddings.py
+++ b/libs/partners/upstage/tests/unit_tests/test_embeddings.py
@ -1,32 +0,0 @@
 """Test embedding model integration."""
 import os
 import pytest
 from langchain_upstage import UpstageEmbeddings
 os.environ["UPSTAGE_API_KEY"] = "foo"
 def test_initialization() -> None:
    """Test embedding model initialization."""
    UpstageEmbeddings(model="solar-embedding-1-large")
 def test_upstage_invalid_model_kwargs() -> None:
    with pytest.raises(ValueError):
        UpstageEmbeddings(
            model="solar-embedding-1-large", model_kwargs={"model": "foo"}
        )
 def test_upstage_invalid_model() -> None:
    with pytest.raises(ValueError):
        UpstageEmbeddings()
 def test_upstage_incorrect_field() -> None:
    with pytest.warns(match="not default parameter"):
        llm = UpstageEmbeddings(model="solar-embedding-1-large", foo="bar")
    assert llm.model_kwargs == {"foo": "bar"}
--- a/libs/partners/upstage/tests/unit_tests/test_groundedness_check.py
+++ b/libs/partners/upstage/tests/unit_tests/test_groundedness_check.py
@ -1,12 +0,0 @@
 import os
 from langchain_upstage import UpstageGroundednessCheck
 os.environ["UPSTAGE_API_KEY"] = "foo"
 def test_initialization() -> None:
    """Test embedding model initialization."""
    UpstageGroundednessCheck()
    UpstageGroundednessCheck(upstage_api_key="key")
    UpstageGroundednessCheck(api_key="key")
--- a/libs/partners/upstage/tests/unit_tests/test_imports.py
+++ b/libs/partners/upstage/tests/unit_tests/test_imports.py
@ -1,14 +0,0 @@
 from langchain_upstage import __all__
 EXPECTED_ALL = [
    "ChatUpstage",
    "GroundednessCheck",
    "UpstageEmbeddings",
    "UpstageLayoutAnalysisLoader",
    "UpstageLayoutAnalysisParser",
    "UpstageGroundednessCheck",
 ]
 def test_all_imports() -> None:
    assert sorted(EXPECTED_ALL) == sorted(__all__)
--- a/libs/partners/upstage/tests/unit_tests/test_layout_analysis.py
+++ b/libs/partners/upstage/tests/unit_tests/test_layout_analysis.py
@ -1,253 +0,0 @@
 import json
 from pathlib import Path
 from typing import Any, Dict, get_args
 from unittest import TestCase
 from unittest.mock import MagicMock, Mock, patch
 import requests
 from langchain_upstage import UpstageLayoutAnalysisLoader
 from langchain_upstage.layout_analysis import OutputType, SplitType
 MOCK_RESPONSE_JSON: Dict[str, Any] = {
    "api": "1.0",
    "billed_pages": 1,
    "elements": [
        {
            "bounding_box": [
                {"x": 74, "y": 906},
                {"x": 148, "y": 906},
                {"x": 148, "y": 2338},
                {"x": 74, "y": 2338},
            ],
            "category": "header",
            "html": "<header id='0'>arXiv:2103.15348v2</header>",
            "id": 0,
            "page": 1,
            "text": "arXiv:2103.15348v2",
        },
        {
            "bounding_box": [
                {"x": 654, "y": 474},
                {"x": 1912, "y": 474},
                {"x": 1912, "y": 614},
                {"x": 654, "y": 614},
            ],
            "category": "paragraph",
            "html": "<p id='1'>LayoutParser Toolkit</p>",
            "id": 1,
            "page": 1,
            "text": "LayoutParser Toolkit",
        },
    ],
    "html": "<header id='0'>arXiv:2103.15348v2</header>"
    + "<p id='1'>LayoutParser Toolkit</p>",
    "mimetype": "multipart/form-data",
    "model": "layout-analyzer-0.1.0",
    "text": "arXiv:2103.15348v2LayoutParser Toolkit",
 }
 EXAMPLE_PDF_PATH = Path(__file__).parent.parent / "examples/solar.pdf"
 def test_initialization() -> None:
    """Test layout analysis document loader initialization."""
    UpstageLayoutAnalysisLoader(file_path=EXAMPLE_PDF_PATH, api_key="bar")
 def test_layout_analysis_param() -> None:
    for output_type in get_args(OutputType):
        for split in get_args(SplitType):
            loader = UpstageLayoutAnalysisLoader(
                file_path=EXAMPLE_PDF_PATH,
                api_key="bar",
                output_type=output_type,
                split=split,
                exclude=[],
            )
            assert loader.output_type == output_type
            assert loader.split == split
            assert loader.api_key == "bar"
            assert loader.file_path == EXAMPLE_PDF_PATH
@patch("requests.post")
 def test_none_split_text_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="text",
        split="none",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 1
    assert documents[0].page_content == MOCK_RESPONSE_JSON["text"]
    assert documents[0].metadata["total_pages"] == 1
    assert documents[0].metadata["type"] == "text"
    assert documents[0].metadata["split"] == "none"
@patch("requests.post")
 def test_element_split_text_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="text",
        split="element",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 2
    for i, document in enumerate(documents):
        assert document.page_content == MOCK_RESPONSE_JSON["elements"][i]["text"]
        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
        assert document.metadata["id"] == MOCK_RESPONSE_JSON["elements"][i]["id"]
        assert document.metadata["type"] == "text"
        assert document.metadata["split"] == "element"
@patch("requests.post")
 def test_page_split_text_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="text",
        split="page",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 1
    for i, document in enumerate(documents):
        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
        assert document.metadata["type"] == "text"
        assert document.metadata["split"] == "page"
@patch("requests.post")
 def test_none_split_html_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="html",
        split="none",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 1
    assert documents[0].page_content == MOCK_RESPONSE_JSON["html"]
    assert documents[0].metadata["total_pages"] == 1
    assert documents[0].metadata["type"] == "html"
    assert documents[0].metadata["split"] == "none"
@patch("requests.post")
 def test_element_split_html_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="html",
        split="element",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 2
    for i, document in enumerate(documents):
        assert document.page_content == MOCK_RESPONSE_JSON["elements"][i]["html"]
        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
        assert document.metadata["id"] == MOCK_RESPONSE_JSON["elements"][i]["id"]
        assert document.metadata["type"] == "html"
        assert document.metadata["split"] == "element"
@patch("requests.post")
 def test_page_split_html_output(mock_post: Mock) -> None:
    mock_post.return_value = MagicMock(
        status_code=200, json=MagicMock(return_value=MOCK_RESPONSE_JSON)
    )
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="html",
        split="page",
        api_key="valid_api_key",
        exclude=[],
    )
    documents = loader.load()
    assert len(documents) == 1
    for i, document in enumerate(documents):
        assert document.metadata["page"] == MOCK_RESPONSE_JSON["elements"][i]["page"]
        assert document.metadata["type"] == "html"
        assert document.metadata["split"] == "page"
@patch("requests.post")
 def test_request_exception(mock_post: Mock) -> None:
    mock_post.side_effect = requests.RequestException("Mocked request exception")
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="html",
        split="page",
        api_key="valid_api_key",
        exclude=[],
    )
    with TestCase.assertRaises(TestCase(), ValueError) as context:
        loader.load()
    assert "Failed to send request: Mocked request exception" == str(context.exception)
@patch("requests.post")
 def test_json_decode_error(mock_post: Mock) -> None:
    mock_response = Mock()
    mock_response.status_code = 200
    mock_response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0)
    mock_post.return_value = mock_response
    loader = UpstageLayoutAnalysisLoader(
        file_path=EXAMPLE_PDF_PATH,
        output_type="html",
        split="page",
        api_key="valid_api_key",
        exclude=[],
    )
    with TestCase.assertRaises(TestCase(), ValueError) as context:
        loader.load()
    assert (
        "Failed to decode JSON response: Expecting value: line 1 column 1 (char 0)"
        == str(context.exception)
    )
--- a/libs/partners/upstage/tests/unit_tests/test_secrets.py
+++ b/libs/partners/upstage/tests/unit_tests/test_secrets.py
@ -1,13 +0,0 @@
 from langchain_upstage import ChatUpstage, UpstageEmbeddings
 def test_chat_upstage_secrets() -> None:
    o = ChatUpstage(upstage_api_key="foo")
    s = str(o)
    assert "foo" not in s
 def test_upstage_embeddings_secrets() -> None:
    o = UpstageEmbeddings(model="solar-embedding-1-large", upstage_api_key="foo")
    s = str(o)
    assert "foo" not in s