langchain/libs/experimental/langchain_experimental/synthetic_data/__init__.py

from typing import Any, Dict, List, Optional

from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema.language_model import BaseLanguageModel

from langchain_experimental.synthetic_data.prompts import SENTENCE_PROMPT


def create_data_generation_chain(
    llm: BaseLanguageModel,
    prompt: Optional[PromptTemplate] = None,
) -> Chain:
    """Creates a chain that generates synthetic sentences with
     provided fields.

    Args:
        llm: The language model to use.
        prompt: Prompt to feed the language model with.
        If not provided, the default one will be used.
    """
    prompt = prompt or SENTENCE_PROMPT
    return LLMChain(
        llm=llm,
        prompt=prompt,
    )


class DatasetGenerator:
    """Generates synthetic dataset with a given language model."""

    def __init__(
        self,
        llm: BaseLanguageModel,
        sentence_preferences: Optional[Dict[str, Any]] = None,
    ):
        self.generator = create_data_generation_chain(llm)
        self.sentence_preferences = sentence_preferences or {}

    def __call__(self, fields_collection: List[List[Any]]) -> List[Dict[str, Any]]:
        results: List[Dict[str, Any]] = []
        for fields in fields_collection:
            results.append(
                self.generator(
                    {"fields": fields, "preferences": self.sentence_preferences}
                )
            )
        return results
fix experimental imports (#10875) 2023-09-21 06:44:17 +00:00			`from typing import Any, Dict, List, Optional`
Synthetic data generation (#9759) ### Description Implements synthetic data generation with the fields and preferences given by the user. Adds showcase notebook. Corresponding prompt was proposed for langchain-hub. ### Example ``` output = chain({"fields": {"colors": ["blue", "yellow"]}, "preferences": {"style": "Make it in a style of a weather forecast."}}) print(output) # {'fields': {'colors': ['blue', 'yellow']}, 'preferences': {'style': 'Make it in a style of a weather forecast.'}, 'text': "Good morning! Today's weather forecast brings a beautiful combination of colors to the sky, with hues of blue and yellow gently blending together like a mesmerizing painting."} ``` ### Twitter handle @deepsense_ai @matt_wosinski --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-09-19 23:29:50 +00:00
fix experimental imports (#10875) 2023-09-21 06:44:17 +00:00			`from langchain.chains.base import Chain`
Synthetic data generation (#9759) ### Description Implements synthetic data generation with the fields and preferences given by the user. Adds showcase notebook. Corresponding prompt was proposed for langchain-hub. ### Example ``` output = chain({"fields": {"colors": ["blue", "yellow"]}, "preferences": {"style": "Make it in a style of a weather forecast."}}) print(output) # {'fields': {'colors': ['blue', 'yellow']}, 'preferences': {'style': 'Make it in a style of a weather forecast.'}, 'text': "Good morning! Today's weather forecast brings a beautiful combination of colors to the sky, with hues of blue and yellow gently blending together like a mesmerizing painting."} ``` ### Twitter handle @deepsense_ai @matt_wosinski --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-09-19 23:29:50 +00:00			`from langchain.chains.llm import LLMChain`
fix experimental imports (#10875) 2023-09-21 06:44:17 +00:00			`from langchain.prompts import PromptTemplate`
			`from langchain.schema.language_model import BaseLanguageModel`
Synthetic data generation (#9759) ### Description Implements synthetic data generation with the fields and preferences given by the user. Adds showcase notebook. Corresponding prompt was proposed for langchain-hub. ### Example ``` output = chain({"fields": {"colors": ["blue", "yellow"]}, "preferences": {"style": "Make it in a style of a weather forecast."}}) print(output) # {'fields': {'colors': ['blue', 'yellow']}, 'preferences': {'style': 'Make it in a style of a weather forecast.'}, 'text': "Good morning! Today's weather forecast brings a beautiful combination of colors to the sky, with hues of blue and yellow gently blending together like a mesmerizing painting."} ``` ### Twitter handle @deepsense_ai @matt_wosinski --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-09-19 23:29:50 +00:00
			`from langchain_experimental.synthetic_data.prompts import SENTENCE_PROMPT`


			`def create_data_generation_chain(`
			`llm: BaseLanguageModel,`
			`prompt: Optional[PromptTemplate] = None,`
			`) -> Chain:`
			`"""Creates a chain that generates synthetic sentences with`
			`provided fields.`

			`Args:`
			`llm: The language model to use.`
			`prompt: Prompt to feed the language model with.`
			`If not provided, the default one will be used.`
			`"""`
			`prompt = prompt or SENTENCE_PROMPT`
			`return LLMChain(`
			`llm=llm,`
			`prompt=prompt,`
			`)`


			`class DatasetGenerator:`
			`"""Generates synthetic dataset with a given language model."""`

			`def __init__(`
			`self,`
			`llm: BaseLanguageModel,`
			`sentence_preferences: Optional[Dict[str, Any]] = None,`
			`):`
			`self.generator = create_data_generation_chain(llm)`
			`self.sentence_preferences = sentence_preferences or {}`

			`def __call__(self, fields_collection: List[List[Any]]) -> List[Dict[str, Any]]:`
			`results: List[Dict[str, Any]] = []`
			`for fields in fields_collection:`
			`results.append(`
			`self.generator(`
			`{"fields": fields, "preferences": self.sentence_preferences}`
			`)`
			`)`
			`return results`