langchain/libs/experimental/langchain_experimental/llms/jsonformer_decoder.py

"""Experimental implementation of jsonformer wrapped LLM."""
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Any, List, Optional, cast

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from pydantic import Field, root_validator

if TYPE_CHECKING:
    import jsonformer


def import_jsonformer() -> jsonformer:
    """Lazily import jsonformer."""
    try:
        import jsonformer
    except ImportError:
        raise ValueError(
            "Could not import jsonformer python package. "
            "Please install it with `pip install jsonformer`."
        )
    return jsonformer


class JsonFormer(HuggingFacePipeline):
    json_schema: dict = Field(..., description="The JSON Schema to complete.")
    max_new_tokens: int = Field(
        default=200, description="Maximum number of new tokens to generate."
    )
    debug: bool = Field(default=False, description="Debug mode.")

    @root_validator
    def check_jsonformer_installation(cls, values: dict) -> dict:
        import_jsonformer()
        return values

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        jsonformer = import_jsonformer()
        from transformers import Text2TextGenerationPipeline

        pipeline = cast(Text2TextGenerationPipeline, self.pipeline)

        model = jsonformer.Jsonformer(
            model=pipeline.model,
            tokenizer=pipeline.tokenizer,
            json_schema=self.json_schema,
            prompt=prompt,
            max_number_tokens=self.max_new_tokens,
            debug=self.debug,
        )
        text = model()
        return json.dumps(text)
Add RELLM and JSONFormer experimental LLM decoding (#4185) [RELLM](https://github.com/r2d4/rellm) is a library that wraps local HuggingFace pipeline models for structured decoding. RELLM works by generating tokens one at a time. At each step, it masks tokens that don't conform to the provided partial regular expression. [JSONFormer](https://github.com/1rgs/jsonformer) is a bit different, where it sequentially adds the keys then decodes each value directly 2023-05-14 22:40:03 +00:00			`"""Experimental implementation of jsonformer wrapped LLM."""`
			`from __future__ import annotations`

			`import json`
support kwargs (#5990) 2023-06-11 17:09:22 +00:00			`from typing import TYPE_CHECKING, Any, List, Optional, cast`
Add RELLM and JSONFormer experimental LLM decoding (#4185) [RELLM](https://github.com/r2d4/rellm) is a library that wraps local HuggingFace pipeline models for structured decoding. RELLM works by generating tokens one at a time. At each step, it masks tokens that don't conform to the provided partial regular expression. [JSONFormer](https://github.com/1rgs/jsonformer) is a bit different, where it sequentially adds the keys then decodes each value directly 2023-05-14 22:40:03 +00:00
			`from langchain.callbacks.manager import CallbackManagerForLLMRun`
			`from langchain.llms.huggingface_pipeline import HuggingFacePipeline`
Harrison/official pre release (#8106) 2023-07-22 01:44:32 +00:00			`from pydantic import Field, root_validator`
Add RELLM and JSONFormer experimental LLM decoding (#4185) [RELLM](https://github.com/r2d4/rellm) is a library that wraps local HuggingFace pipeline models for structured decoding. RELLM works by generating tokens one at a time. At each step, it masks tokens that don't conform to the provided partial regular expression. [JSONFormer](https://github.com/1rgs/jsonformer) is a bit different, where it sequentially adds the keys then decodes each value directly 2023-05-14 22:40:03 +00:00
			`if TYPE_CHECKING:`
			`import jsonformer`


			`def import_jsonformer() -> jsonformer:`
			`"""Lazily import jsonformer."""`
			`try:`
			`import jsonformer`
			`except ImportError:`
			`raise ValueError(`
			`"Could not import jsonformer python package. "`
			"Please install it with `pip install jsonformer`."
			`)`
			`return jsonformer`


			`class JsonFormer(HuggingFacePipeline):`
			`json_schema: dict = Field(..., description="The JSON Schema to complete.")`
			`max_new_tokens: int = Field(`
			`default=200, description="Maximum number of new tokens to generate."`
			`)`
			`debug: bool = Field(default=False, description="Debug mode.")`

			`@root_validator`
			`def check_jsonformer_installation(cls, values: dict) -> dict:`
			`import_jsonformer()`
			`return values`

			`def _call(`
			`self,`
			`prompt: str,`
			`stop: Optional[List[str]] = None,`
			`run_manager: Optional[CallbackManagerForLLMRun] = None,`
support kwargs (#5990) 2023-06-11 17:09:22 +00:00			`**kwargs: Any,`
Add RELLM and JSONFormer experimental LLM decoding (#4185) [RELLM](https://github.com/r2d4/rellm) is a library that wraps local HuggingFace pipeline models for structured decoding. RELLM works by generating tokens one at a time. At each step, it masks tokens that don't conform to the provided partial regular expression. [JSONFormer](https://github.com/1rgs/jsonformer) is a bit different, where it sequentially adds the keys then decodes each value directly 2023-05-14 22:40:03 +00:00			`) -> str:`
			`jsonformer = import_jsonformer()`
			`from transformers import Text2TextGenerationPipeline`

			`pipeline = cast(Text2TextGenerationPipeline, self.pipeline)`

			`model = jsonformer.Jsonformer(`
			`model=pipeline.model,`
			`tokenizer=pipeline.tokenizer,`
			`json_schema=self.json_schema,`
			`prompt=prompt,`
			`max_number_tokens=self.max_new_tokens,`
			`debug=self.debug,`
			`)`
			`text = model()`
			`return json.dumps(text)`