Add vowpal_wabbit RL chain (#11462)

pull/10245/head^2
Bagatur 10 months ago committed by GitHub
commit 8e3fbc97ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

File diff suppressed because one or more lines are too long

@ -0,0 +1,54 @@
import logging
from langchain_experimental.rl_chain.base import (
AutoSelectionScorer,
BasedOn,
Embed,
Embedder,
Policy,
SelectionScorer,
ToSelectFrom,
VwPolicy,
embed,
stringify_embedding,
)
from langchain_experimental.rl_chain.pick_best_chain import (
PickBest,
PickBestEvent,
PickBestFeatureEmbedder,
PickBestRandomPolicy,
PickBestSelected,
)
def configure_logger() -> None:
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
ch.setFormatter(formatter)
ch.setLevel(logging.INFO)
logger.addHandler(ch)
configure_logger()
__all__ = [
"PickBest",
"PickBestEvent",
"PickBestSelected",
"PickBestFeatureEmbedder",
"PickBestRandomPolicy",
"Embed",
"BasedOn",
"ToSelectFrom",
"SelectionScorer",
"AutoSelectionScorer",
"Embedder",
"Policy",
"VwPolicy",
"embed",
"stringify_embedding",
]

@ -0,0 +1,635 @@
from __future__ import annotations
import logging
import os
from abc import ABC, abstractmethod
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generic,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
)
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain
from langchain.prompts import (
BasePromptTemplate,
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain_experimental.pydantic_v1 import BaseModel, Extra, root_validator
from langchain_experimental.rl_chain.metrics import (
MetricsTrackerAverage,
MetricsTrackerRollingWindow,
)
from langchain_experimental.rl_chain.model_repository import ModelRepository
from langchain_experimental.rl_chain.vw_logger import VwLogger
if TYPE_CHECKING:
import vowpal_wabbit_next as vw
logger = logging.getLogger(__name__)
class _BasedOn:
def __init__(self, value: Any):
self.value = value
def __str__(self) -> str:
return str(self.value)
__repr__ = __str__
def BasedOn(anything: Any) -> _BasedOn:
return _BasedOn(anything)
class _ToSelectFrom:
def __init__(self, value: Any):
self.value = value
def __str__(self) -> str:
return str(self.value)
__repr__ = __str__
def ToSelectFrom(anything: Any) -> _ToSelectFrom:
if not isinstance(anything, list):
raise ValueError("ToSelectFrom must be a list to select from")
return _ToSelectFrom(anything)
class _Embed:
def __init__(self, value: Any, keep: bool = False):
self.value = value
self.keep = keep
def __str__(self) -> str:
return str(self.value)
__repr__ = __str__
def Embed(anything: Any, keep: bool = False) -> Any:
if isinstance(anything, _ToSelectFrom):
return ToSelectFrom(Embed(anything.value, keep=keep))
elif isinstance(anything, _BasedOn):
return BasedOn(Embed(anything.value, keep=keep))
if isinstance(anything, list):
return [Embed(v, keep=keep) for v in anything]
elif isinstance(anything, dict):
return {k: Embed(v, keep=keep) for k, v in anything.items()}
elif isinstance(anything, _Embed):
return anything
return _Embed(anything, keep=keep)
def EmbedAndKeep(anything: Any) -> Any:
return Embed(anything, keep=True)
# helper functions
def stringify_embedding(embedding: List) -> str:
return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)])
def parse_lines(parser: "vw.TextFormatParser", input_str: str) -> List["vw.Example"]:
return [parser.parse_line(line) for line in input_str.split("\n")]
def get_based_on_and_to_select_from(inputs: Dict[str, Any]) -> Tuple[Dict, Dict]:
to_select_from = {
k: inputs[k].value
for k in inputs.keys()
if isinstance(inputs[k], _ToSelectFrom)
}
if not to_select_from:
raise ValueError(
"No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from." # noqa: E501
)
based_on = {
k: inputs[k].value if isinstance(inputs[k].value, list) else [inputs[k].value]
for k in inputs.keys()
if isinstance(inputs[k], _BasedOn)
}
return based_on, to_select_from
def prepare_inputs_for_autoembed(inputs: Dict[str, Any]) -> Dict[str, Any]:
"""
go over all the inputs and if something is either wrapped in _ToSelectFrom or _BasedOn, and if their inner values are not already _Embed,
then wrap them in EmbedAndKeep while retaining their _ToSelectFrom or _BasedOn status
""" # noqa: E501
next_inputs = inputs.copy()
for k, v in next_inputs.items():
if isinstance(v, _ToSelectFrom) or isinstance(v, _BasedOn):
if not isinstance(v.value, _Embed):
next_inputs[k].value = EmbedAndKeep(v.value)
return next_inputs
# end helper functions
class Selected(ABC):
pass
TSelected = TypeVar("TSelected", bound=Selected)
class Event(Generic[TSelected], ABC):
inputs: Dict[str, Any]
selected: Optional[TSelected]
def __init__(self, inputs: Dict[str, Any], selected: Optional[TSelected] = None):
self.inputs = inputs
self.selected = selected
TEvent = TypeVar("TEvent", bound=Event)
class Policy(Generic[TEvent], ABC):
def __init__(self, **kwargs: Any):
pass
@abstractmethod
def predict(self, event: TEvent) -> Any:
...
@abstractmethod
def learn(self, event: TEvent) -> None:
...
@abstractmethod
def log(self, event: TEvent) -> None:
...
def save(self) -> None:
pass
class VwPolicy(Policy):
def __init__(
self,
model_repo: ModelRepository,
vw_cmd: List[str],
feature_embedder: Embedder,
vw_logger: VwLogger,
*args: Any,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
self.model_repo = model_repo
self.workspace = self.model_repo.load(vw_cmd)
self.feature_embedder = feature_embedder
self.vw_logger = vw_logger
def predict(self, event: TEvent) -> Any:
import vowpal_wabbit_next as vw
text_parser = vw.TextFormatParser(self.workspace)
return self.workspace.predict_one(
parse_lines(text_parser, self.feature_embedder.format(event))
)
def learn(self, event: TEvent) -> None:
import vowpal_wabbit_next as vw
vw_ex = self.feature_embedder.format(event)
text_parser = vw.TextFormatParser(self.workspace)
multi_ex = parse_lines(text_parser, vw_ex)
self.workspace.learn_one(multi_ex)
def log(self, event: TEvent) -> None:
if self.vw_logger.logging_enabled():
vw_ex = self.feature_embedder.format(event)
self.vw_logger.log(vw_ex)
def save(self) -> None:
self.model_repo.save(self.workspace)
class Embedder(Generic[TEvent], ABC):
def __init__(self, *args: Any, **kwargs: Any):
pass
@abstractmethod
def format(self, event: TEvent) -> str:
...
class SelectionScorer(Generic[TEvent], ABC, BaseModel):
"""Abstract method to grade the chosen selection or the response of the llm"""
@abstractmethod
def score_response(
self, inputs: Dict[str, Any], llm_response: str, event: TEvent
) -> float:
...
class AutoSelectionScorer(SelectionScorer[Event], BaseModel):
llm_chain: LLMChain
prompt: Union[BasePromptTemplate, None] = None
scoring_criteria_template_str: Optional[str] = None
@staticmethod
def get_default_system_prompt() -> SystemMessagePromptTemplate:
return SystemMessagePromptTemplate.from_template(
"PLEASE RESPOND ONLY WITH A SINGLE FLOAT AND NO OTHER TEXT EXPLANATION\n \
You are a strict judge that is called on to rank a response based on \
given criteria. You must respond with your ranking by providing a \
single float within the range [0, 1], 0 being very bad \
response and 1 being very good response."
)
@staticmethod
def get_default_prompt() -> ChatPromptTemplate:
human_template = 'Given this based_on "{rl_chain_selected_based_on}" \
as the most important attribute, rank how good or bad this text is: \
"{rl_chain_selected}".'
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
default_system_prompt = AutoSelectionScorer.get_default_system_prompt()
chat_prompt = ChatPromptTemplate.from_messages(
[default_system_prompt, human_message_prompt]
)
return chat_prompt
@root_validator(pre=True)
def set_prompt_and_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]:
llm = values.get("llm")
prompt = values.get("prompt")
scoring_criteria_template_str = values.get("scoring_criteria_template_str")
if prompt is None and scoring_criteria_template_str is None:
prompt = AutoSelectionScorer.get_default_prompt()
elif prompt is None and scoring_criteria_template_str is not None:
human_message_prompt = HumanMessagePromptTemplate.from_template(
scoring_criteria_template_str
)
default_system_prompt = AutoSelectionScorer.get_default_system_prompt()
prompt = ChatPromptTemplate.from_messages(
[default_system_prompt, human_message_prompt]
)
values["prompt"] = prompt
values["llm_chain"] = LLMChain(llm=llm, prompt=prompt)
return values
def score_response(
self, inputs: Dict[str, Any], llm_response: str, event: Event
) -> float:
ranking = self.llm_chain.predict(llm_response=llm_response, **inputs)
ranking = ranking.strip()
try:
resp = float(ranking)
return resp
except Exception as e:
raise RuntimeError(
f"The auto selection scorer did not manage to score the response, there is always the option to try again or tweak the reward prompt. Error: {e}" # noqa: E501
)
class RLChain(Chain, Generic[TEvent]):
"""
The `RLChain` class leverages the Vowpal Wabbit (VW) model as a learned policy for reinforcement learning.
Attributes:
- llm_chain (Chain): Represents the underlying Language Model chain.
- prompt (BasePromptTemplate): The template for the base prompt.
- selection_scorer (Union[SelectionScorer, None]): Scorer for the selection. Can be set to None.
- policy (Optional[Policy]): The policy used by the chain to learn to populate a dynamic prompt.
- auto_embed (bool): Determines if embedding should be automatic. Default is False.
- metrics (Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]]): Tracker for metrics, can be set to None.
Initialization Attributes:
- feature_embedder (Embedder): Embedder used for the `BasedOn` and `ToSelectFrom` inputs.
- model_save_dir (str, optional): Directory for saving the VW model. Default is the current directory.
- reset_model (bool): If set to True, the model starts training from scratch. Default is False.
- vw_cmd (List[str], optional): Command line arguments for the VW model.
- policy (Type[VwPolicy]): Policy used by the chain.
- vw_logs (Optional[Union[str, os.PathLike]]): Path for the VW logs.
- metrics_step (int): Step for the metrics tracker. Default is -1. If set without metrics_window_size, average metrics will be tracked, otherwise rolling window metrics will be tracked.
- metrics_window_size (int): Window size for the metrics tracker. Default is -1. If set, rolling window metrics will be tracked.
Notes:
The class initializes the VW model using the provided arguments. If `selection_scorer` is not provided, a warning is logged, indicating that no reinforcement learning will occur unless the `update_with_delayed_score` method is called.
""" # noqa: E501
class _NoOpPolicy(Policy):
"""Placeholder policy that does nothing"""
def predict(self, event: TEvent) -> Any:
return None
def learn(self, event: TEvent) -> None:
pass
def log(self, event: TEvent) -> None:
pass
llm_chain: Chain
output_key: str = "result" #: :meta private:
prompt: BasePromptTemplate
selection_scorer: Union[SelectionScorer, None]
active_policy: Policy = _NoOpPolicy()
auto_embed: bool = False
selection_scorer_activated: bool = True
selected_input_key = "rl_chain_selected"
selected_based_on_input_key = "rl_chain_selected_based_on"
metrics: Optional[Union[MetricsTrackerRollingWindow, MetricsTrackerAverage]] = None
def __init__(
self,
feature_embedder: Embedder,
model_save_dir: str = "./",
reset_model: bool = False,
vw_cmd: Optional[List[str]] = None,
policy: Type[Policy] = VwPolicy,
vw_logs: Optional[Union[str, os.PathLike]] = None,
metrics_step: int = -1,
metrics_window_size: int = -1,
*args: Any,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
if self.selection_scorer is None:
logger.warning(
"No selection scorer provided, which means that no \
reinforcement learning will be done in the RL chain \
unless update_with_delayed_score is called."
)
if isinstance(self.active_policy, RLChain._NoOpPolicy):
self.active_policy = policy(
model_repo=ModelRepository(
model_save_dir, with_history=True, reset=reset_model
),
vw_cmd=vw_cmd or [],
feature_embedder=feature_embedder,
vw_logger=VwLogger(vw_logs),
)
if metrics_window_size > 0:
self.metrics = MetricsTrackerRollingWindow(
step=metrics_step, window_size=metrics_window_size
)
else:
self.metrics = MetricsTrackerAverage(step=metrics_step)
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
def input_keys(self) -> List[str]:
"""Expect input key.
:meta private:
"""
return []
@property
def output_keys(self) -> List[str]:
"""Expect output key.
:meta private:
"""
return [self.output_key]
def update_with_delayed_score(
self, score: float, chain_response: Dict[str, Any], force_score: bool = False
) -> None:
"""
Updates the learned policy with the score provided.
Will raise an error if selection_scorer is set, and force_score=True was not provided during the method call
""" # noqa: E501
if self._can_use_selection_scorer() and not force_score:
raise RuntimeError(
"The selection scorer is set, and force_score was not set to True. Please set force_score=True to use this function." # noqa: E501
)
if self.metrics:
self.metrics.on_feedback(score)
event: TEvent = chain_response["selection_metadata"]
self._call_after_scoring_before_learning(event=event, score=score)
self.active_policy.learn(event=event)
self.active_policy.log(event=event)
def deactivate_selection_scorer(self) -> None:
"""
Deactivates the selection scorer, meaning that the chain will no longer attempt to use the selection scorer to score responses.
""" # noqa: E501
self.selection_scorer_activated = False
def activate_selection_scorer(self) -> None:
"""
Activates the selection scorer, meaning that the chain will attempt to use the selection scorer to score responses.
""" # noqa: E501
self.selection_scorer_activated = True
def save_progress(self) -> None:
"""
This function should be called to save the state of the learned policy model.
""" # noqa: E501
self.active_policy.save()
def _validate_inputs(self, inputs: Dict[str, Any]) -> None:
super()._validate_inputs(inputs)
if (
self.selected_input_key in inputs.keys()
or self.selected_based_on_input_key in inputs.keys()
):
raise ValueError(
f"The rl chain does not accept '{self.selected_input_key}' or '{self.selected_based_on_input_key}' as input keys, they are reserved for internal use during auto reward." # noqa: E501
)
def _can_use_selection_scorer(self) -> bool:
"""
Returns whether the chain can use the selection scorer to score responses or not.
""" # noqa: E501
return self.selection_scorer is not None and self.selection_scorer_activated
@abstractmethod
def _call_before_predict(self, inputs: Dict[str, Any]) -> TEvent:
...
@abstractmethod
def _call_after_predict_before_llm(
self, inputs: Dict[str, Any], event: TEvent, prediction: Any
) -> Tuple[Dict[str, Any], TEvent]:
...
@abstractmethod
def _call_after_llm_before_scoring(
self, llm_response: str, event: TEvent
) -> Tuple[Dict[str, Any], TEvent]:
...
@abstractmethod
def _call_after_scoring_before_learning(
self, event: TEvent, score: Optional[float]
) -> TEvent:
...
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
event: TEvent = self._call_before_predict(inputs=inputs)
prediction = self.active_policy.predict(event=event)
if self.metrics:
self.metrics.on_decision()
next_chain_inputs, event = self._call_after_predict_before_llm(
inputs=inputs, event=event, prediction=prediction
)
t = self.llm_chain.run(**next_chain_inputs, callbacks=_run_manager.get_child())
_run_manager.on_text(t, color="green", verbose=self.verbose)
t = t.strip()
if self.verbose:
_run_manager.on_text("\nCode: ", verbose=self.verbose)
output = t
_run_manager.on_text("\nAnswer: ", verbose=self.verbose)
_run_manager.on_text(output, color="yellow", verbose=self.verbose)
next_chain_inputs, event = self._call_after_llm_before_scoring(
llm_response=output, event=event
)
score = None
try:
if self._can_use_selection_scorer():
score = self.selection_scorer.score_response( # type: ignore
inputs=next_chain_inputs, llm_response=output, event=event
)
except Exception as e:
logger.info(
f"The selection scorer was not able to score, \
and the chain was not able to adjust to this response, error: {e}"
)
if self.metrics and score is not None:
self.metrics.on_feedback(score)
event = self._call_after_scoring_before_learning(score=score, event=event)
self.active_policy.learn(event=event)
self.active_policy.log(event=event)
return {self.output_key: {"response": output, "selection_metadata": event}}
@property
def _chain_type(self) -> str:
return "llm_personalizer_chain"
def is_stringtype_instance(item: Any) -> bool:
"""Helper function to check if an item is a string."""
return isinstance(item, str) or (
isinstance(item, _Embed) and isinstance(item.value, str)
)
def embed_string_type(
item: Union[str, _Embed], model: Any, namespace: Optional[str] = None
) -> Dict[str, Union[str, List[str]]]:
"""Helper function to embed a string or an _Embed object."""
keep_str = ""
if isinstance(item, _Embed):
encoded = stringify_embedding(model.encode(item.value))
if item.keep:
keep_str = item.value.replace(" ", "_") + " "
elif isinstance(item, str):
encoded = item.replace(" ", "_")
else:
raise ValueError(f"Unsupported type {type(item)} for embedding")
if namespace is None:
raise ValueError(
"The default namespace must be provided when embedding a string or _Embed object." # noqa: E501
)
return {namespace: keep_str + encoded}
def embed_dict_type(item: Dict, model: Any) -> Dict[str, Any]:
"""Helper function to embed a dictionary item."""
inner_dict: Dict = {}
for ns, embed_item in item.items():
if isinstance(embed_item, list):
inner_dict[ns] = []
for embed_list_item in embed_item:
embedded = embed_string_type(embed_list_item, model, ns)
inner_dict[ns].append(embedded[ns])
else:
inner_dict.update(embed_string_type(embed_item, model, ns))
return inner_dict
def embed_list_type(
item: list, model: Any, namespace: Optional[str] = None
) -> List[Dict[str, Union[str, List[str]]]]:
ret_list: List = []
for embed_item in item:
if isinstance(embed_item, dict):
ret_list.append(embed_dict_type(embed_item, model))
elif isinstance(embed_item, list):
item_embedding = embed_list_type(embed_item, model, namespace)
# Get the first key from the first dictionary
first_key = next(iter(item_embedding[0]))
# Group the values under that key
grouping = {first_key: [item[first_key] for item in item_embedding]}
ret_list.append(grouping)
else:
ret_list.append(embed_string_type(embed_item, model, namespace))
return ret_list
def embed(
to_embed: Union[Union[str, _Embed], Dict, List[Union[str, _Embed]], List[Dict]],
model: Any,
namespace: Optional[str] = None,
) -> List[Dict[str, Union[str, List[str]]]]:
"""
Embeds the actions or context using the SentenceTransformer model (or a model that has an `encode` function)
Attributes:
to_embed: (Union[Union(str, _Embed(str)), Dict, List[Union(str, _Embed(str))], List[Dict]], required) The text to be embedded, either a string, a list of strings or a dictionary or a list of dictionaries.
namespace: (str, optional) The default namespace to use when dictionary or list of dictionaries not provided.
model: (Any, required) The model to use for embedding
Returns:
List[Dict[str, str]]: A list of dictionaries where each dictionary has the namespace as the key and the embedded string as the value
""" # noqa: E501
if (isinstance(to_embed, _Embed) and isinstance(to_embed.value, str)) or isinstance(
to_embed, str
):
return [embed_string_type(to_embed, model, namespace)]
elif isinstance(to_embed, dict):
return [embed_dict_type(to_embed, model)]
elif isinstance(to_embed, list):
return embed_list_type(to_embed, model, namespace)
else:
raise ValueError("Invalid input format for embedding")

@ -0,0 +1,66 @@
from collections import deque
from typing import TYPE_CHECKING, Dict, List, Union
if TYPE_CHECKING:
import pandas as pd
class MetricsTrackerAverage:
def __init__(self, step: int):
self.history: List[Dict[str, Union[int, float]]] = [{"step": 0, "score": 0}]
self.step: int = step
self.i: int = 0
self.num: float = 0
self.denom: float = 0
@property
def score(self) -> float:
return self.num / self.denom if self.denom > 0 else 0
def on_decision(self) -> None:
self.denom += 1
def on_feedback(self, score: float) -> None:
self.num += score or 0
self.i += 1
if self.step > 0 and self.i % self.step == 0:
self.history.append({"step": self.i, "score": self.score})
def to_pandas(self) -> "pd.DataFrame":
import pandas as pd
return pd.DataFrame(self.history)
class MetricsTrackerRollingWindow:
def __init__(self, window_size: int, step: int):
self.history: List[Dict[str, Union[int, float]]] = [{"step": 0, "score": 0}]
self.step: int = step
self.i: int = 0
self.window_size: int = window_size
self.queue: deque = deque()
self.sum: float = 0.0
@property
def score(self) -> float:
return self.sum / len(self.queue) if len(self.queue) > 0 else 0
def on_decision(self) -> None:
pass
def on_feedback(self, value: float) -> None:
self.sum += value
self.queue.append(value)
self.i += 1
if len(self.queue) > self.window_size:
old_val = self.queue.popleft()
self.sum -= old_val
if self.step > 0 and self.i % self.step == 0:
self.history.append({"step": self.i, "score": self.sum / len(self.queue)})
def to_pandas(self) -> "pd.DataFrame":
import pandas as pd
return pd.DataFrame(self.history)

@ -0,0 +1,63 @@
import datetime
import glob
import logging
import os
import shutil
from pathlib import Path
from typing import TYPE_CHECKING, List, Union
if TYPE_CHECKING:
import vowpal_wabbit_next as vw
logger = logging.getLogger(__name__)
class ModelRepository:
def __init__(
self,
folder: Union[str, os.PathLike],
with_history: bool = True,
reset: bool = False,
):
self.folder = Path(folder)
self.model_path = self.folder / "latest.vw"
self.with_history = with_history
if reset and self.has_history():
logger.warning(
"There is non empty history which is recommended to be cleaned up"
)
if self.model_path.exists():
os.remove(self.model_path)
self.folder.mkdir(parents=True, exist_ok=True)
def get_tag(self) -> str:
return datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
def has_history(self) -> bool:
return len(glob.glob(str(self.folder / "model-????????-??????.vw"))) > 0
def save(self, workspace: "vw.Workspace") -> None:
with open(self.model_path, "wb") as f:
logger.info(f"storing rl_chain model in: {self.model_path}")
f.write(workspace.serialize())
if self.with_history: # write history
shutil.copyfile(self.model_path, self.folder / f"model-{self.get_tag()}.vw")
def load(self, commandline: List[str]) -> "vw.Workspace":
try:
import vowpal_wabbit_next as vw
except ImportError as e:
raise ImportError(
"Unable to import vowpal_wabbit_next, please install with "
"`pip install vowpal_wabbit_next`."
) from e
model_data = None
if self.model_path.exists():
with open(self.model_path, "rb") as f:
model_data = f.read()
if model_data:
logger.info(f"rl_chain model is loaded from: {self.model_path}")
return vw.Workspace(commandline, model_data=model_data)
return vw.Workspace(commandline)

@ -0,0 +1,412 @@
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.llm import LLMChain
from langchain.prompts import BasePromptTemplate
import langchain_experimental.rl_chain.base as base
logger = logging.getLogger(__name__)
# sentinel object used to distinguish between
# user didn't supply anything or user explicitly supplied None
SENTINEL = object()
class PickBestSelected(base.Selected):
index: Optional[int]
probability: Optional[float]
score: Optional[float]
def __init__(
self,
index: Optional[int] = None,
probability: Optional[float] = None,
score: Optional[float] = None,
):
self.index = index
self.probability = probability
self.score = score
class PickBestEvent(base.Event[PickBestSelected]):
def __init__(
self,
inputs: Dict[str, Any],
to_select_from: Dict[str, Any],
based_on: Dict[str, Any],
selected: Optional[PickBestSelected] = None,
):
super().__init__(inputs=inputs, selected=selected)
self.to_select_from = to_select_from
self.based_on = based_on
class PickBestFeatureEmbedder(base.Embedder[PickBestEvent]):
"""
Text Embedder class that embeds the `BasedOn` and `ToSelectFrom` inputs into a format that can be used by the learning policy
Attributes:
model name (Any, optional): The type of embeddings to be used for feature representation. Defaults to BERT SentenceTransformer.
""" # noqa E501
def __init__(
self, auto_embed: bool, model: Optional[Any] = None, *args: Any, **kwargs: Any
):
super().__init__(*args, **kwargs)
if model is None:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-mpnet-base-v2")
self.model = model
self.auto_embed = auto_embed
@staticmethod
def _str(embedding: List[float]) -> str:
return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)])
def get_label(self, event: PickBestEvent) -> tuple:
cost = None
if event.selected:
chosen_action = event.selected.index
cost = (
-1.0 * event.selected.score
if event.selected.score is not None
else None
)
prob = event.selected.probability
return chosen_action, cost, prob
else:
return None, None, None
def get_context_and_action_embeddings(self, event: PickBestEvent) -> tuple:
context_emb = base.embed(event.based_on, self.model) if event.based_on else None
to_select_from_var_name, to_select_from = next(
iter(event.to_select_from.items()), (None, None)
)
action_embs = (
(
base.embed(to_select_from, self.model, to_select_from_var_name)
if event.to_select_from
else None
)
if to_select_from
else None
)
if not context_emb or not action_embs:
raise ValueError(
"Context and to_select_from must be provided in the inputs dictionary"
)
return context_emb, action_embs
def get_indexed_dot_product(self, context_emb: List, action_embs: List) -> Dict:
import numpy as np
unique_contexts = set()
for context_item in context_emb:
for ns, ee in context_item.items():
if isinstance(ee, list):
for ea in ee:
unique_contexts.add(f"{ns}={ea}")
else:
unique_contexts.add(f"{ns}={ee}")
encoded_contexts = self.model.encode(list(unique_contexts))
context_embeddings = dict(zip(unique_contexts, encoded_contexts))
unique_actions = set()
for action in action_embs:
for ns, e in action.items():
if isinstance(e, list):
for ea in e:
unique_actions.add(f"{ns}={ea}")
else:
unique_actions.add(f"{ns}={e}")
encoded_actions = self.model.encode(list(unique_actions))
action_embeddings = dict(zip(unique_actions, encoded_actions))
action_matrix = np.stack([v for k, v in action_embeddings.items()])
context_matrix = np.stack([v for k, v in context_embeddings.items()])
dot_product_matrix = np.dot(context_matrix, action_matrix.T)
indexed_dot_product: Dict = {}
for i, context_key in enumerate(context_embeddings.keys()):
indexed_dot_product[context_key] = {}
for j, action_key in enumerate(action_embeddings.keys()):
indexed_dot_product[context_key][action_key] = dot_product_matrix[i, j]
return indexed_dot_product
def format_auto_embed_on(self, event: PickBestEvent) -> str:
chosen_action, cost, prob = self.get_label(event)
context_emb, action_embs = self.get_context_and_action_embeddings(event)
indexed_dot_product = self.get_indexed_dot_product(context_emb, action_embs)
action_lines = []
for i, action in enumerate(action_embs):
line_parts = []
dot_prods = []
if cost is not None and chosen_action == i:
line_parts.append(f"{chosen_action}:{cost}:{prob}")
for ns, action in action.items():
line_parts.append(f"|{ns}")
elements = action if isinstance(action, list) else [action]
nsa = []
for elem in elements:
line_parts.append(f"{elem}")
ns_a = f"{ns}={elem}"
nsa.append(ns_a)
for k, v in indexed_dot_product.items():
dot_prods.append(v[ns_a])
nsa_str = " ".join(nsa)
line_parts.append(f"|# {nsa_str}")
line_parts.append(f"|dotprod {self._str(dot_prods)}")
action_lines.append(" ".join(line_parts))
shared = []
for item in context_emb:
for ns, context in item.items():
shared.append(f"|{ns}")
elements = context if isinstance(context, list) else [context]
nsc = []
for elem in elements:
shared.append(f"{elem}")
nsc.append(f"{ns}={elem}")
nsc_str = " ".join(nsc)
shared.append(f"|@ {nsc_str}")
return "shared " + " ".join(shared) + "\n" + "\n".join(action_lines)
def format_auto_embed_off(self, event: PickBestEvent) -> str:
"""
Converts the `BasedOn` and `ToSelectFrom` into a format that can be used by VW
"""
chosen_action, cost, prob = self.get_label(event)
context_emb, action_embs = self.get_context_and_action_embeddings(event)
example_string = ""
example_string += "shared "
for context_item in context_emb:
for ns, based_on in context_item.items():
e = " ".join(based_on) if isinstance(based_on, list) else based_on
example_string += f"|{ns} {e} "
example_string += "\n"
for i, action in enumerate(action_embs):
if cost is not None and chosen_action == i:
example_string += f"{chosen_action}:{cost}:{prob} "
for ns, action_embedding in action.items():
e = (
" ".join(action_embedding)
if isinstance(action_embedding, list)
else action_embedding
)
example_string += f"|{ns} {e} "
example_string += "\n"
# Strip the last newline
return example_string[:-1]
def format(self, event: PickBestEvent) -> str:
if self.auto_embed:
return self.format_auto_embed_on(event)
else:
return self.format_auto_embed_off(event)
class PickBestRandomPolicy(base.Policy[PickBestEvent]):
def __init__(self, feature_embedder: base.Embedder, **kwargs: Any):
self.feature_embedder = feature_embedder
def predict(self, event: PickBestEvent) -> List[Tuple[int, float]]:
num_items = len(event.to_select_from)
return [(i, 1.0 / num_items) for i in range(num_items)]
def learn(self, event: PickBestEvent) -> None:
pass
def log(self, event: PickBestEvent) -> None:
pass
class PickBest(base.RLChain[PickBestEvent]):
"""
`PickBest` is a class designed to leverage the Vowpal Wabbit (VW) model for reinforcement learning with a context, with the goal of modifying the prompt before the LLM call.
Each invocation of the chain's `run()` method should be equipped with a set of potential actions (`ToSelectFrom`) and will result in the selection of a specific action based on the `BasedOn` input. This chosen action then informs the LLM (Language Model) prompt for the subsequent response generation.
The standard operation flow of this Chain includes:
1. The Chain is invoked with inputs containing the `BasedOn` criteria and a list of potential actions (`ToSelectFrom`).
2. An action is selected based on the `BasedOn` input.
3. The LLM is called with the dynamic prompt, producing a response.
4. If a `selection_scorer` is provided, it is used to score the selection.
5. The internal Vowpal Wabbit model is updated with the `BasedOn` input, the chosen `ToSelectFrom` action, and the resulting score from the scorer.
6. The final response is returned.
Expected input dictionary format:
- At least one variable encapsulated within `BasedOn` to serve as the selection criteria.
- A single list variable within `ToSelectFrom`, representing potential actions for the VW model. This list can take the form of:
- A list of strings, e.g., `action = ToSelectFrom(["action1", "action2", "action3"])`
- A list of list of strings e.g. `action = ToSelectFrom([["action1", "another identifier of action1"], ["action2", "another identifier of action2"]])`
- A list of dictionaries, where each dictionary represents an action with namespace names as keys and corresponding action strings as values. For instance, `action = ToSelectFrom([{"namespace1": ["action1", "another identifier of action1"], "namespace2": "action2"}, {"namespace1": "action3", "namespace2": "action4"}])`.
Extends:
RLChain
Attributes:
feature_embedder (PickBestFeatureEmbedder, optional): Is an advanced attribute. Responsible for embedding the `BasedOn` and `ToSelectFrom` inputs. If omitted, a default embedder is utilized.
""" # noqa E501
def __init__(
self,
*args: Any,
**kwargs: Any,
):
auto_embed = kwargs.get("auto_embed", False)
feature_embedder = kwargs.get("feature_embedder", None)
if feature_embedder:
if "auto_embed" in kwargs:
logger.warning(
"auto_embed will take no effect when explicit feature_embedder is provided" # noqa E501
)
# turning auto_embed off for cli setting below
auto_embed = False
else:
feature_embedder = PickBestFeatureEmbedder(auto_embed=auto_embed)
kwargs["feature_embedder"] = feature_embedder
vw_cmd = kwargs.get("vw_cmd", [])
if vw_cmd:
if "--cb_explore_adf" not in vw_cmd:
raise ValueError(
"If vw_cmd is specified, it must include --cb_explore_adf"
)
else:
interactions = ["--interactions=::"]
if auto_embed:
interactions = [
"--interactions=@#",
"--ignore_linear=@",
"--ignore_linear=#",
]
vw_cmd = interactions + [
"--cb_explore_adf",
"--coin",
"--squarecb",
"--quiet",
]
kwargs["vw_cmd"] = vw_cmd
super().__init__(*args, **kwargs)
def _call_before_predict(self, inputs: Dict[str, Any]) -> PickBestEvent:
context, actions = base.get_based_on_and_to_select_from(inputs=inputs)
if not actions:
raise ValueError(
"No variables using 'ToSelectFrom' found in the inputs. Please include at least one variable containing a list to select from." # noqa E501
)
if len(list(actions.values())) > 1:
raise ValueError(
"Only one variable using 'ToSelectFrom' can be provided in the inputs for the PickBest chain. Please provide only one variable containing a list to select from." # noqa E501
)
if not context:
raise ValueError(
"No variables using 'BasedOn' found in the inputs. Please include at least one variable containing information to base the selected of ToSelectFrom on." # noqa E501
)
event = PickBestEvent(inputs=inputs, to_select_from=actions, based_on=context)
return event
def _call_after_predict_before_llm(
self,
inputs: Dict[str, Any],
event: PickBestEvent,
prediction: List[Tuple[int, float]],
) -> Tuple[Dict[str, Any], PickBestEvent]:
import numpy as np
prob_sum = sum(prob for _, prob in prediction)
probabilities = [prob / prob_sum for _, prob in prediction]
## sample from the pmf
sampled_index = np.random.choice(len(prediction), p=probabilities)
sampled_ap = prediction[sampled_index]
sampled_action = sampled_ap[0]
sampled_prob = sampled_ap[1]
selected = PickBestSelected(index=sampled_action, probability=sampled_prob)
event.selected = selected
# only one key, value pair in event.to_select_from
key, value = next(iter(event.to_select_from.items()))
next_chain_inputs = inputs.copy()
next_chain_inputs.update({key: value[event.selected.index]})
return next_chain_inputs, event
def _call_after_llm_before_scoring(
self, llm_response: str, event: PickBestEvent
) -> Tuple[Dict[str, Any], PickBestEvent]:
next_chain_inputs = event.inputs.copy()
# only one key, value pair in event.to_select_from
value = next(iter(event.to_select_from.values()))
v = (
value[event.selected.index]
if event.selected
else event.to_select_from.values()
)
next_chain_inputs.update(
{
self.selected_based_on_input_key: str(event.based_on),
self.selected_input_key: v,
}
)
return next_chain_inputs, event
def _call_after_scoring_before_learning(
self, event: PickBestEvent, score: Optional[float]
) -> PickBestEvent:
if event.selected:
event.selected.score = score
return event
def _call(
self,
inputs: Dict[str, Any],
run_manager: Optional[CallbackManagerForChainRun] = None,
) -> Dict[str, Any]:
return super()._call(run_manager=run_manager, inputs=inputs)
@property
def _chain_type(self) -> str:
return "rl_chain_pick_best"
@classmethod
def from_llm(
cls: Type[PickBest],
llm: BaseLanguageModel,
prompt: BasePromptTemplate,
selection_scorer: Union[base.AutoSelectionScorer, object] = SENTINEL,
**kwargs: Any,
) -> PickBest:
llm_chain = LLMChain(llm=llm, prompt=prompt)
if selection_scorer is SENTINEL:
selection_scorer = base.AutoSelectionScorer(llm=llm_chain.llm)
return PickBest(
llm_chain=llm_chain,
prompt=prompt,
selection_scorer=selection_scorer,
**kwargs,
)

@ -0,0 +1,18 @@
from os import PathLike
from pathlib import Path
from typing import Optional, Union
class VwLogger:
def __init__(self, path: Optional[Union[str, PathLike]]):
self.path = Path(path) if path else None
if self.path:
self.path.parent.mkdir(parents=True, exist_ok=True)
def log(self, vw_ex: str) -> None:
if self.path:
with open(self.path, "a") as f:
f.write(f"{vw_ex}\n\n")
def logging_enabled(self) -> bool:
return bool(self.path)

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
[[package]]
name = "aiohttp"
@ -937,6 +937,41 @@ files = [
{file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"},
]
[[package]]
name = "fsspec"
version = "2023.9.2"
description = "File-system specification"
optional = true
python-versions = ">=3.8"
files = [
{file = "fsspec-2023.9.2-py3-none-any.whl", hash = "sha256:603dbc52c75b84da501b9b2ec8c11e1f61c25984c4a0dda1f129ef391fbfc9b4"},
{file = "fsspec-2023.9.2.tar.gz", hash = "sha256:80bfb8c70cc27b2178cc62a935ecf242fc6e8c3fb801f9c571fc01b1e715ba7d"},
]
[package.extras]
abfs = ["adlfs"]
adl = ["adlfs"]
arrow = ["pyarrow (>=1)"]
dask = ["dask", "distributed"]
devel = ["pytest", "pytest-cov"]
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
fuse = ["fusepy"]
gcs = ["gcsfs"]
git = ["pygit2"]
github = ["requests"]
gs = ["gcsfs"]
gui = ["panel"]
hdfs = ["pyarrow (>=1)"]
http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
libarchive = ["libarchive-c"]
oci = ["ocifs"]
s3 = ["s3fs"]
sftp = ["paramiko"]
smb = ["smbprotocol"]
ssh = ["paramiko"]
tqdm = ["tqdm"]
[[package]]
name = "greenlet"
version = "2.0.2"
@ -949,6 +984,7 @@ files = [
{file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
{file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
{file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
@ -957,6 +993,7 @@ files = [
{file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
{file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
{file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
{file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
@ -986,6 +1023,7 @@ files = [
{file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
{file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
{file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
{file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
@ -994,6 +1032,7 @@ files = [
{file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
{file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
{file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
{file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
{file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
@ -1010,6 +1049,39 @@ files = [
docs = ["Sphinx", "docutils (<0.18)"]
test = ["objgraph", "psutil"]
[[package]]
name = "huggingface-hub"
version = "0.17.3"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = true
python-versions = ">=3.8.0"
files = [
{file = "huggingface_hub-0.17.3-py3-none-any.whl", hash = "sha256:545eb3665f6ac587add946e73984148f2ea5c7877eac2e845549730570c1933a"},
{file = "huggingface_hub-0.17.3.tar.gz", hash = "sha256:40439632b211311f788964602bf8b0d9d6b7a2314fba4e8d67b2ce3ecea0e3fd"},
]
[package.dependencies]
filelock = "*"
fsspec = "*"
packaging = ">=20.9"
pyyaml = ">=5.1"
requests = "*"
tqdm = ">=4.42.1"
typing-extensions = ">=3.7.4.3"
[package.extras]
all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"]
cli = ["InquirerPy (==0.3.4)"]
dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"]
docs = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (==23.7)", "gradio", "hf-doc-builder", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)", "watchdog"]
fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
inference = ["aiohttp", "pydantic (<2.0)"]
quality = ["black (==23.7)", "mypy (==1.5.1)", "ruff (>=0.0.241)"]
tensorflow = ["graphviz", "pydot", "tensorflow"]
testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (<2.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
torch = ["torch"]
typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
[[package]]
name = "idna"
version = "3.4"
@ -1223,6 +1295,17 @@ MarkupSafe = ">=2.0"
[package.extras]
i18n = ["Babel (>=2.7)"]
[[package]]
name = "joblib"
version = "1.3.2"
description = "Lightweight pipelining with Python functions"
optional = true
python-versions = ">=3.7"
files = [
{file = "joblib-1.3.2-py3-none-any.whl", hash = "sha256:ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9"},
{file = "joblib-1.3.2.tar.gz", hash = "sha256:92f865e621e17784e7955080b6d042489e3b8e294949cc44c6eac304f59772b1"},
]
[[package]]
name = "json5"
version = "0.9.14"
@ -1666,6 +1749,16 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@ -1757,6 +1850,23 @@ files = [
{file = "mistune-3.0.1.tar.gz", hash = "sha256:e912116c13aa0944f9dc530db38eb88f6a77087ab128f49f84a48f4c05ea163c"},
]
[[package]]
name = "mpmath"
version = "1.3.0"
description = "Python library for arbitrary-precision floating-point arithmetic"
optional = true
python-versions = "*"
files = [
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
{file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
]
[package.extras]
develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
docs = ["sphinx"]
gmpy = ["gmpy2 (>=2.1.0a4)"]
tests = ["pytest (>=4.6)"]
[[package]]
name = "multidict"
version = "6.0.4"
@ -2030,6 +2140,49 @@ files = [
{file = "nest_asyncio-1.5.7.tar.gz", hash = "sha256:6a80f7b98f24d9083ed24608977c09dd608d83f91cccc24c9d2cba6d10e01c10"},
]
[[package]]
name = "networkx"
version = "3.1"
description = "Python package for creating and manipulating graphs and networks"
optional = true
python-versions = ">=3.8"
files = [
{file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"},
{file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"},
]
[package.extras]
default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"]
developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"]
doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"]
extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"]
test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
[[package]]
name = "nltk"
version = "3.8.1"
description = "Natural Language Toolkit"
optional = true
python-versions = ">=3.7"
files = [
{file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"},
{file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"},
]
[package.dependencies]
click = "*"
joblib = "*"
regex = ">=2021.8.3"
tqdm = "*"
[package.extras]
all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"]
corenlp = ["requests"]
machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
plot = ["matplotlib"]
tgrep = ["pyparsing"]
twitter = ["twython"]
[[package]]
name = "notebook"
version = "7.0.2"
@ -2225,6 +2378,73 @@ files = [
{file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
]
[[package]]
name = "pillow"
version = "10.0.1"
description = "Python Imaging Library (Fork)"
optional = true
python-versions = ">=3.8"
files = [
{file = "Pillow-10.0.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:8f06be50669087250f319b706decf69ca71fdecd829091a37cc89398ca4dc17a"},
{file = "Pillow-10.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50bd5f1ebafe9362ad622072a1d2f5850ecfa44303531ff14353a4059113b12d"},
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6a90167bcca1216606223a05e2cf991bb25b14695c518bc65639463d7db722d"},
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f11c9102c56ffb9ca87134bd025a43d2aba3f1155f508eff88f694b33a9c6d19"},
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:186f7e04248103482ea6354af6d5bcedb62941ee08f7f788a1c7707bc720c66f"},
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0462b1496505a3462d0f35dc1c4d7b54069747d65d00ef48e736acda2c8cbdff"},
{file = "Pillow-10.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d889b53ae2f030f756e61a7bff13684dcd77e9af8b10c6048fb2c559d6ed6eaf"},
{file = "Pillow-10.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:552912dbca585b74d75279a7570dd29fa43b6d93594abb494ebb31ac19ace6bd"},
{file = "Pillow-10.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:787bb0169d2385a798888e1122c980c6eff26bf941a8ea79747d35d8f9210ca0"},
{file = "Pillow-10.0.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:fd2a5403a75b54661182b75ec6132437a181209b901446ee5724b589af8edef1"},
{file = "Pillow-10.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2d7e91b4379f7a76b31c2dda84ab9e20c6220488e50f7822e59dac36b0cd92b1"},
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19e9adb3f22d4c416e7cd79b01375b17159d6990003633ff1d8377e21b7f1b21"},
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93139acd8109edcdeffd85e3af8ae7d88b258b3a1e13a038f542b79b6d255c54"},
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:92a23b0431941a33242b1f0ce6c88a952e09feeea9af4e8be48236a68ffe2205"},
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cbe68deb8580462ca0d9eb56a81912f59eb4542e1ef8f987405e35a0179f4ea2"},
{file = "Pillow-10.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:522ff4ac3aaf839242c6f4e5b406634bfea002469656ae8358644fc6c4856a3b"},
{file = "Pillow-10.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:84efb46e8d881bb06b35d1d541aa87f574b58e87f781cbba8d200daa835b42e1"},
{file = "Pillow-10.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:898f1d306298ff40dc1b9ca24824f0488f6f039bc0e25cfb549d3195ffa17088"},
{file = "Pillow-10.0.1-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:bcf1207e2f2385a576832af02702de104be71301c2696d0012b1b93fe34aaa5b"},
{file = "Pillow-10.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d6c9049c6274c1bb565021367431ad04481ebb54872edecfcd6088d27edd6ed"},
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28444cb6ad49726127d6b340217f0627abc8732f1194fd5352dec5e6a0105635"},
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de596695a75496deb3b499c8c4f8e60376e0516e1a774e7bc046f0f48cd620ad"},
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:2872f2d7846cf39b3dbff64bc1104cc48c76145854256451d33c5faa55c04d1a"},
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4ce90f8a24e1c15465048959f1e94309dfef93af272633e8f37361b824532e91"},
{file = "Pillow-10.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ee7810cf7c83fa227ba9125de6084e5e8b08c59038a7b2c9045ef4dde61663b4"},
{file = "Pillow-10.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b1be1c872b9b5fcc229adeadbeb51422a9633abd847c0ff87dc4ef9bb184ae08"},
{file = "Pillow-10.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:98533fd7fa764e5f85eebe56c8e4094db912ccbe6fbf3a58778d543cadd0db08"},
{file = "Pillow-10.0.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:764d2c0daf9c4d40ad12fbc0abd5da3af7f8aa11daf87e4fa1b834000f4b6b0a"},
{file = "Pillow-10.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fcb59711009b0168d6ee0bd8fb5eb259c4ab1717b2f538bbf36bacf207ef7a68"},
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:697a06bdcedd473b35e50a7e7506b1d8ceb832dc238a336bd6f4f5aa91a4b500"},
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f665d1e6474af9f9da5e86c2a3a2d2d6204e04d5af9c06b9d42afa6ebde3f21"},
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:2fa6dd2661838c66f1a5473f3b49ab610c98a128fc08afbe81b91a1f0bf8c51d"},
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:3a04359f308ebee571a3127fdb1bd01f88ba6f6fb6d087f8dd2e0d9bff43f2a7"},
{file = "Pillow-10.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:723bd25051454cea9990203405fa6b74e043ea76d4968166dfd2569b0210886a"},
{file = "Pillow-10.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:71671503e3015da1b50bd18951e2f9daf5b6ffe36d16f1eb2c45711a301521a7"},
{file = "Pillow-10.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:44e7e4587392953e5e251190a964675f61e4dae88d1e6edbe9f36d6243547ff3"},
{file = "Pillow-10.0.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:3855447d98cced8670aaa63683808df905e956f00348732448b5a6df67ee5849"},
{file = "Pillow-10.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ed2d9c0704f2dc4fa980b99d565c0c9a543fe5101c25b3d60488b8ba80f0cce1"},
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5bb289bb835f9fe1a1e9300d011eef4d69661bb9b34d5e196e5e82c4cb09b37"},
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0d3e54ab1df9df51b914b2233cf779a5a10dfd1ce339d0421748232cea9876"},
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:2cc6b86ece42a11f16f55fe8903595eff2b25e0358dec635d0a701ac9586588f"},
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ca26ba5767888c84bf5a0c1a32f069e8204ce8c21d00a49c90dabeba00ce0145"},
{file = "Pillow-10.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f0b4b06da13275bc02adfeb82643c4a6385bd08d26f03068c2796f60d125f6f2"},
{file = "Pillow-10.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bc2e3069569ea9dbe88d6b8ea38f439a6aad8f6e7a6283a38edf61ddefb3a9bf"},
{file = "Pillow-10.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8b451d6ead6e3500b6ce5c7916a43d8d8d25ad74b9102a629baccc0808c54971"},
{file = "Pillow-10.0.1-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:32bec7423cdf25c9038fef614a853c9d25c07590e1a870ed471f47fb80b244db"},
{file = "Pillow-10.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cf63d2c6928b51d35dfdbda6f2c1fddbe51a6bc4a9d4ee6ea0e11670dd981e"},
{file = "Pillow-10.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f6d3d4c905e26354e8f9d82548475c46d8e0889538cb0657aa9c6f0872a37aa4"},
{file = "Pillow-10.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:847e8d1017c741c735d3cd1883fa7b03ded4f825a6e5fcb9378fd813edee995f"},
{file = "Pillow-10.0.1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:7f771e7219ff04b79e231d099c0a28ed83aa82af91fd5fa9fdb28f5b8d5addaf"},
{file = "Pillow-10.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459307cacdd4138edee3875bbe22a2492519e060660eaf378ba3b405d1c66317"},
{file = "Pillow-10.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b059ac2c4c7a97daafa7dc850b43b2d3667def858a4f112d1aa082e5c3d6cf7d"},
{file = "Pillow-10.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6caf3cd38449ec3cd8a68b375e0c6fe4b6fd04edb6c9766b55ef84a6e8ddf2d"},
{file = "Pillow-10.0.1.tar.gz", hash = "sha256:d72967b06be9300fed5cfbc8b5bafceec48bf7cdc7dab66b1d2549035287191d"},
]
[package.extras]
docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
[[package]]
name = "pkgutil-resolve-name"
version = "1.3.10"
@ -2644,6 +2864,7 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -2651,8 +2872,15 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -2669,6 +2897,7 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -2676,6 +2905,7 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -3134,6 +3364,170 @@ files = [
{file = "ruff-0.0.249.tar.gz", hash = "sha256:b590689f08ecef971c45555cbda6854cdf48f3828fc326802828e851b1a14b3d"},
]
[[package]]
name = "safetensors"
version = "0.3.3"
description = "Fast and Safe Tensor serialization"
optional = true
python-versions = "*"
files = [
{file = "safetensors-0.3.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:92e4d0c8b2836120fddd134474c5bda8963f322333941f8b9f643e5b24f041eb"},
{file = "safetensors-0.3.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3dcadb6153c42addc9c625a622ebde9293fabe1973f9ef31ba10fb42c16e8536"},
{file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:08f26b61e1b0a14dc959aa9d568776bd038805f611caef1de04a80c468d4a7a4"},
{file = "safetensors-0.3.3-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:17f41344d9a075f2f21b289a49a62e98baff54b5754240ba896063bce31626bf"},
{file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:f1045f798e1a16a6ced98d6a42ec72936d367a2eec81dc5fade6ed54638cd7d2"},
{file = "safetensors-0.3.3-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:eaf0e4bc91da13f21ac846a39429eb3f3b7ed06295a32321fa3eb1a59b5c70f3"},
{file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25149180d4dc8ca48bac2ac3852a9424b466e36336a39659b35b21b2116f96fc"},
{file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9e943bf78c39de8865398a71818315e7d5d1af93c7b30d4da3fc852e62ad9bc"},
{file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cccfcac04a010354e87c7a2fe16a1ff004fc4f6e7ef8efc966ed30122ce00bc7"},
{file = "safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a07121f427e646a50d18c1be0fa1a2cbf6398624c31149cd7e6b35486d72189e"},
{file = "safetensors-0.3.3-cp310-cp310-win32.whl", hash = "sha256:a85e29cbfddfea86453cc0f4889b4bcc6b9c155be9a60e27be479a34e199e7ef"},
{file = "safetensors-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:e13adad4a3e591378f71068d14e92343e626cf698ff805f61cdb946e684a218e"},
{file = "safetensors-0.3.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:cbc3312f134baf07334dd517341a4b470b2931f090bd9284888acb7dfaf4606f"},
{file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d15030af39d5d30c22bcbc6d180c65405b7ea4c05b7bab14a570eac7d7d43722"},
{file = "safetensors-0.3.3-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:f84a74cbe9859b28e3d6d7715ac1dd3097bebf8d772694098f6d42435245860c"},
{file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:10d637423d98ab2e6a4ad96abf4534eb26fcaf8ca3115623e64c00759374e90d"},
{file = "safetensors-0.3.3-cp311-cp311-macosx_13_0_universal2.whl", hash = "sha256:3b46f5de8b44084aff2e480874c550c399c730c84b2e8ad1bddb062c94aa14e9"},
{file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e76da691a82dfaf752854fa6d17c8eba0c8466370c5ad8cf1bfdf832d3c7ee17"},
{file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4e342fd54e66aa9512dd13e410f791e47aa4feeb5f4c9a20882c72f3d272f29"},
{file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:178fd30b5dc73bce14a39187d948cedd0e5698e2f055b7ea16b5a96c9b17438e"},
{file = "safetensors-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e8fdf7407dba44587ed5e79d5de3533d242648e1f2041760b21474bd5ea5c8c"},
{file = "safetensors-0.3.3-cp311-cp311-win32.whl", hash = "sha256:7d3b744cee8d7a46ffa68db1a2ff1a1a432488e3f7a5a97856fe69e22139d50c"},
{file = "safetensors-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f579877d30feec9b6ba409d05fa174633a4fc095675a4a82971d831a8bb60b97"},
{file = "safetensors-0.3.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:2fff5b19a1b462c17322998b2f4b8bce43c16fe208968174d2f3a1446284ceed"},
{file = "safetensors-0.3.3-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:41adb1d39e8aad04b16879e3e0cbcb849315999fad73bc992091a01e379cb058"},
{file = "safetensors-0.3.3-cp37-cp37m-macosx_12_0_x86_64.whl", hash = "sha256:0f2b404250b3b877b11d34afcc30d80e7035714a1116a3df56acaca6b6c00096"},
{file = "safetensors-0.3.3-cp37-cp37m-macosx_13_0_x86_64.whl", hash = "sha256:b43956ef20e9f4f2e648818a9e7b3499edd6b753a0f5526d4f6a6826fbee8446"},
{file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d61a99b34169981f088ccfbb2c91170843efc869a0a0532f422db7211bf4f474"},
{file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0008aab36cd20e9a051a68563c6f80d40f238c2611811d7faa5a18bf3fd3984"},
{file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93d54166072b143084fdcd214a080a088050c1bb1651016b55942701b31334e4"},
{file = "safetensors-0.3.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c32ee08f61cea56a5d62bbf94af95df6040c8ab574afffaeb7b44ae5da1e9e3"},
{file = "safetensors-0.3.3-cp37-cp37m-win32.whl", hash = "sha256:351600f367badd59f7bfe86d317bb768dd8c59c1561c6fac43cafbd9c1af7827"},
{file = "safetensors-0.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:034717e297849dae1af0a7027a14b8647bd2e272c24106dced64d83e10d468d1"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8530399666748634bc0b301a6a5523756931b0c2680d188e743d16304afe917a"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:9d741c1f1621e489ba10aa3d135b54202684f6e205df52e219d5eecd673a80c9"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:0c345fd85b4d2093a5109596ff4cd9dfc2e84992e881b4857fbc4a93a3b89ddb"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:69ccee8d05f55cdf76f7e6c87d2bdfb648c16778ef8acfd2ecc495e273e9233e"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:c08a9a4b7a4ca389232fa8d097aebc20bbd4f61e477abc7065b5c18b8202dede"},
{file = "safetensors-0.3.3-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:a002868d2e3f49bbe81bee2655a411c24fa1f8e68b703dec6629cb989d6ae42e"},
{file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bd2704cb41faa44d3ec23e8b97330346da0395aec87f8eaf9c9e2c086cdbf13"},
{file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b2951bf3f0ad63df5e6a95263652bd6c194a6eb36fd4f2d29421cd63424c883"},
{file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07114cec116253ca2e7230fdea30acf76828f21614afd596d7b5438a2f719bd8"},
{file = "safetensors-0.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab43aeeb9eadbb6b460df3568a662e6f1911ecc39387f8752afcb6a7d96c087"},
{file = "safetensors-0.3.3-cp38-cp38-win32.whl", hash = "sha256:f2f59fce31dd3429daca7269a6b06f65e6547a0c248f5116976c3f1e9b73f251"},
{file = "safetensors-0.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:c31ca0d8610f57799925bf08616856b39518ab772c65093ef1516762e796fde4"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:59a596b3225c96d59af412385981f17dd95314e3fffdf359c7e3f5bb97730a19"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:82a16e92210a6221edd75ab17acdd468dd958ef5023d9c6c1289606cc30d1479"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:98a929e763a581f516373ef31983ed1257d2d0da912a8e05d5cd12e9e441c93a"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:12b83f1986cd16ea0454c636c37b11e819d60dd952c26978310a0835133480b7"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:f439175c827c2f1bbd54df42789c5204a10983a30bc4242bc7deaf854a24f3f0"},
{file = "safetensors-0.3.3-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:0085be33b8cbcb13079b3a8e131656e05b0bc5e6970530d4c24150f7afd76d70"},
{file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e3ec70c87b1e910769034206ad5efc051069b105aac1687f6edcd02526767f4"},
{file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f490132383e5e490e710608f4acffcb98ed37f91b885c7217d3f9f10aaff9048"},
{file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79d1b6c7ed5596baf79c80fbce5198c3cdcc521ae6a157699f427aba1a90082d"},
{file = "safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad3cc8006e7a86ee7c88bd2813ec59cd7cc75b03e6fa4af89b9c7b235b438d68"},
{file = "safetensors-0.3.3-cp39-cp39-win32.whl", hash = "sha256:ab29f54c6b8c301ca05fa014728996bd83aac6e21528f893aaf8945c71f42b6d"},
{file = "safetensors-0.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:0fa82004eae1a71e2aa29843ef99de9350e459a0fc2f65fc6ee0da9690933d2d"},
{file = "safetensors-0.3.3.tar.gz", hash = "sha256:edb7072d788c4f929d0f5735d3a2fb51e5a27f833587828583b7f5747af1a2b8"},
]
[package.extras]
all = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"]
dev = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "flax (>=0.6.3)", "h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "isort (>=5.5.4)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)", "tensorflow (==2.11.0)", "torch (>=1.10)"]
jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "numpy (>=1.21.6)"]
numpy = ["numpy (>=1.21.6)"]
paddlepaddle = ["numpy (>=1.21.6)", "paddlepaddle (>=2.4.1)"]
pinned-tf = ["tensorflow (==2.11.0)"]
quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
tensorflow = ["numpy (>=1.21.6)", "tensorflow (>=2.11.0)"]
testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "numpy (>=1.21.6)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "setuptools-rust (>=1.5.2)"]
torch = ["numpy (>=1.21.6)", "torch (>=1.10)"]
[[package]]
name = "scikit-learn"
version = "1.3.1"
description = "A set of python modules for machine learning and data mining"
optional = true
python-versions = ">=3.8"
files = [
{file = "scikit-learn-1.3.1.tar.gz", hash = "sha256:1a231cced3ee3fa04756b4a7ab532dc9417acd581a330adff5f2c01ac2831fcf"},
{file = "scikit_learn-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3153612ff8d36fa4e35ef8b897167119213698ea78f3fd130b4068e6f8d2da5a"},
{file = "scikit_learn-1.3.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6bb9490fdb8e7e00f1354621689187bef3cab289c9b869688f805bf724434755"},
{file = "scikit_learn-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7135a03af71138669f19bc96e7d0cc8081aed4b3565cc3b131135d65fc642ba"},
{file = "scikit_learn-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d8dee8c1f40eeba49a85fe378bdf70a07bb64aba1a08fda1e0f48d27edfc3e6"},
{file = "scikit_learn-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:4d379f2b34096105a96bd857b88601dffe7389bd55750f6f29aaa37bc6272eb5"},
{file = "scikit_learn-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14e8775eba072ab10866a7e0596bc9906873e22c4c370a651223372eb62de180"},
{file = "scikit_learn-1.3.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:58b0c2490eff8355dc26e884487bf8edaccf2ba48d09b194fb2f3a026dd64f9d"},
{file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"},
{file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"},
{file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"},
{file = "scikit_learn-1.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ef540e09873e31569bc8b02c8a9f745ee04d8e1263255a15c9969f6f5caa627f"},
{file = "scikit_learn-1.3.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9147a3a4df4d401e618713880be023e36109c85d8569b3bf5377e6cd3fecdeac"},
{file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2cd3634695ad192bf71645702b3df498bd1e246fc2d529effdb45a06ab028b4"},
{file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043"},
{file = "scikit_learn-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e1aa8f206d0de814b81b41d60c1ce31f7f2c7354597af38fae46d9c47c45122"},
{file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"},
{file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"},
{file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"},
{file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ce9233cdf0cdcf0858a5849d306490bf6de71fa7603a3835124e386e62f2311"},
{file = "scikit_learn-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:1ec668ce003a5b3d12d020d2cde0abd64b262ac5f098b5c84cf9657deb9996a8"},
{file = "scikit_learn-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccbbedae99325628c1d1cbe3916b7ef58a1ce949672d8d39c8b190e10219fd32"},
{file = "scikit_learn-1.3.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:845f81c7ceb4ea6bac64ab1c9f2ce8bef0a84d0f21f3bece2126adcc213dfecd"},
{file = "scikit_learn-1.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8454d57a22d856f1fbf3091bd86f9ebd4bff89088819886dc0c72f47a6c30652"},
{file = "scikit_learn-1.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d993fb70a1d78c9798b8f2f28705bfbfcd546b661f9e2e67aa85f81052b9c53"},
{file = "scikit_learn-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:66f7bb1fec37d65f4ef85953e1df5d3c98a0f0141d394dcdaead5a6de9170347"},
]
[package.dependencies]
joblib = ">=1.1.1"
numpy = ">=1.17.3,<2.0"
scipy = ">=1.5.0"
threadpoolctl = ">=2.0.0"
[package.extras]
benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"]
docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"]
tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"]
[[package]]
name = "scipy"
version = "1.9.3"
description = "Fundamental algorithms for scientific computing in Python"
optional = true
python-versions = ">=3.8"
files = [
{file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"},
{file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"},
{file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"},
{file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"},
{file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"},
{file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"},
{file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"},
{file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"},
{file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"},
{file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"},
{file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"},
{file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"},
{file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"},
{file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"},
{file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"},
{file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"},
{file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"},
{file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"},
{file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"},
{file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"},
{file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"},
]
[package.dependencies]
numpy = ">=1.18.5,<1.26.0"
[package.extras]
dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"]
doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"]
test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
[[package]]
name = "send2trash"
version = "1.8.2"
@ -3150,6 +3544,82 @@ nativelib = ["pyobjc-framework-Cocoa", "pywin32"]
objc = ["pyobjc-framework-Cocoa"]
win32 = ["pywin32"]
[[package]]
name = "sentence-transformers"
version = "2.2.2"
description = "Multilingual text embeddings"
optional = true
python-versions = ">=3.6.0"
files = [
{file = "sentence-transformers-2.2.2.tar.gz", hash = "sha256:dbc60163b27de21076c9a30d24b5b7b6fa05141d68cf2553fa9a77bf79a29136"},
]
[package.dependencies]
huggingface-hub = ">=0.4.0"
nltk = "*"
numpy = "*"
scikit-learn = "*"
scipy = "*"
sentencepiece = "*"
torch = ">=1.6.0"
torchvision = "*"
tqdm = "*"
transformers = ">=4.6.0,<5.0.0"
[[package]]
name = "sentencepiece"
version = "0.1.99"
description = "SentencePiece python wrapper"
optional = true
python-versions = "*"
files = [
{file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0eb528e70571b7c02723e5804322469b82fe7ea418c96051d0286c0fa028db73"},
{file = "sentencepiece-0.1.99-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77d7fafb2c4e4659cbdf303929503f37a26eabc4ff31d3a79bf1c5a1b338caa7"},
{file = "sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be9cf5b9e404c245aeb3d3723c737ba7a8f5d4ba262ef233a431fa6c45f732a0"},
{file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baed1a26464998f9710d20e52607c29ffd4293e7c71c6a1f83f51ad0911ec12c"},
{file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9832f08bb372d4c8b567612f8eab9e36e268dff645f1c28f9f8e851be705f6d1"},
{file = "sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:019e7535108e309dae2b253a75834fc3128240aa87c00eb80732078cdc182588"},
{file = "sentencepiece-0.1.99-cp310-cp310-win32.whl", hash = "sha256:fa16a830416bb823fa2a52cbdd474d1f7f3bba527fd2304fb4b140dad31bb9bc"},
{file = "sentencepiece-0.1.99-cp310-cp310-win_amd64.whl", hash = "sha256:14b0eccb7b641d4591c3e12ae44cab537d68352e4d3b6424944f0c447d2348d5"},
{file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6d3c56f24183a1e8bd61043ff2c58dfecdc68a5dd8955dc13bab83afd5f76b81"},
{file = "sentencepiece-0.1.99-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed6ea1819fd612c989999e44a51bf556d0ef6abfb553080b9be3d347e18bcfb7"},
{file = "sentencepiece-0.1.99-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2a0260cd1fb7bd8b4d4f39dc2444a8d5fd4e0a0c4d5c899810ef1abf99b2d45"},
{file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a1abff4d1ff81c77cac3cc6fefa34fa4b8b371e5ee51cb7e8d1ebc996d05983"},
{file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004e6a621d4bc88978eecb6ea7959264239a17b70f2cbc348033d8195c9808ec"},
{file = "sentencepiece-0.1.99-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db361e03342c41680afae5807590bc88aa0e17cfd1a42696a160e4005fcda03b"},
{file = "sentencepiece-0.1.99-cp311-cp311-win32.whl", hash = "sha256:2d95e19168875b70df62916eb55428a0cbcb834ac51d5a7e664eda74def9e1e0"},
{file = "sentencepiece-0.1.99-cp311-cp311-win_amd64.whl", hash = "sha256:f90d73a6f81248a909f55d8e6ef56fec32d559e1e9af045f0b0322637cb8e5c7"},
{file = "sentencepiece-0.1.99-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:62e24c81e74bd87a6e0d63c51beb6527e4c0add67e1a17bac18bcd2076afcfeb"},
{file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57efcc2d51caff20d9573567d9fd3f854d9efe613ed58a439c78c9f93101384a"},
{file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a904c46197993bd1e95b93a6e373dca2f170379d64441041e2e628ad4afb16f"},
{file = "sentencepiece-0.1.99-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d89adf59854741c0d465f0e1525b388c0d174f611cc04af54153c5c4f36088c4"},
{file = "sentencepiece-0.1.99-cp36-cp36m-win32.whl", hash = "sha256:47c378146928690d1bc106fdf0da768cebd03b65dd8405aa3dd88f9c81e35dba"},
{file = "sentencepiece-0.1.99-cp36-cp36m-win_amd64.whl", hash = "sha256:9ba142e7a90dd6d823c44f9870abdad45e6c63958eb60fe44cca6828d3b69da2"},
{file = "sentencepiece-0.1.99-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7b1a9ae4d7c6f1f867e63370cca25cc17b6f4886729595b885ee07a58d3cec3"},
{file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0f644c9d4d35c096a538507b2163e6191512460035bf51358794a78515b74f7"},
{file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8843d23a0f686d85e569bd6dcd0dd0e0cbc03731e63497ca6d5bacd18df8b85"},
{file = "sentencepiece-0.1.99-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e6f690a1caebb4867a2e367afa1918ad35be257ecdb3455d2bbd787936f155"},
{file = "sentencepiece-0.1.99-cp37-cp37m-win32.whl", hash = "sha256:8a321866c2f85da7beac74a824b4ad6ddc2a4c9bccd9382529506d48f744a12c"},
{file = "sentencepiece-0.1.99-cp37-cp37m-win_amd64.whl", hash = "sha256:c42f753bcfb7661c122a15b20be7f684b61fc8592c89c870adf52382ea72262d"},
{file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:85b476406da69c70586f0bb682fcca4c9b40e5059814f2db92303ea4585c650c"},
{file = "sentencepiece-0.1.99-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cfbcfe13c69d3f87b7fcd5da168df7290a6d006329be71f90ba4f56bc77f8561"},
{file = "sentencepiece-0.1.99-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:445b0ec381af1cd4eef95243e7180c63d9c384443c16c4c47a28196bd1cda937"},
{file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6890ea0f2b4703f62d0bf27932e35808b1f679bdb05c7eeb3812b935ba02001"},
{file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb71af492b0eefbf9f2501bec97bcd043b6812ab000d119eaf4bd33f9e283d03"},
{file = "sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b866b5bd3ddd54166bbcbf5c8d7dd2e0b397fac8537991c7f544220b1f67bc"},
{file = "sentencepiece-0.1.99-cp38-cp38-win32.whl", hash = "sha256:b133e8a499eac49c581c3c76e9bdd08c338cc1939e441fee6f92c0ccb5f1f8be"},
{file = "sentencepiece-0.1.99-cp38-cp38-win_amd64.whl", hash = "sha256:0eaf3591dd0690a87f44f4df129cf8d05d8a4029b5b6709b489b8e27f9a9bcff"},
{file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38efeda9bbfb55052d482a009c6a37e52f42ebffcea9d3a98a61de7aee356a28"},
{file = "sentencepiece-0.1.99-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c030b081dc1e1bcc9fadc314b19b740715d3d566ad73a482da20d7d46fd444c"},
{file = "sentencepiece-0.1.99-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84dbe53e02e4f8a2e45d2ac3e430d5c83182142658e25edd76539b7648928727"},
{file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b0f55d0a0ee1719b4b04221fe0c9f0c3461dc3dabd77a035fa2f4788eb3ef9a"},
{file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18e800f206cd235dc27dc749299e05853a4e4332e8d3dfd81bf13d0e5b9007d9"},
{file = "sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ae1c40cda8f9d5b0423cfa98542735c0235e7597d79caf318855cdf971b2280"},
{file = "sentencepiece-0.1.99-cp39-cp39-win32.whl", hash = "sha256:c84ce33af12ca222d14a1cdd37bd76a69401e32bc68fe61c67ef6b59402f4ab8"},
{file = "sentencepiece-0.1.99-cp39-cp39-win_amd64.whl", hash = "sha256:350e5c74d739973f1c9643edb80f7cc904dc948578bcb1d43c6f2b173e5d18dd"},
{file = "sentencepiece-0.1.99.tar.gz", hash = "sha256:189c48f5cb2949288f97ccdb97f0473098d9c3dcf5a3d99d4eabe719ec27297f"},
]
[[package]]
name = "setuptools"
version = "67.8.0"
@ -3380,7 +3850,7 @@ files = [
]
[package.dependencies]
greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""}
typing-extensions = ">=4.2.0"
[package.extras]
@ -3466,6 +3936,20 @@ pure-eval = "*"
[package.extras]
tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
[[package]]
name = "sympy"
version = "1.12"
description = "Computer algebra system (CAS) in Python"
optional = true
python-versions = ">=3.8"
files = [
{file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"},
{file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"},
]
[package.dependencies]
mpmath = ">=0.19"
[[package]]
name = "tenacity"
version = "8.2.3"
@ -3578,6 +4062,17 @@ mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
torch = ["torch (>=1.6.0)"]
[[package]]
name = "threadpoolctl"
version = "3.2.0"
description = "threadpoolctl"
optional = true
python-versions = ">=3.8"
files = [
{file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"},
{file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"},
]
[[package]]
name = "tinycss2"
version = "1.2.1"
@ -3613,6 +4108,121 @@ idna = "*"
requests = ">=2.1.0"
requests-file = ">=1.4"
[[package]]
name = "tokenizers"
version = "0.14.1rc1"
description = ""
optional = true
python-versions = ">=3.7"
files = [
{file = "tokenizers-0.14.1rc1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:95f1cc4dbfbb52a1f0192e7498d55f0e01d54491ea0c4b17e99a23fb1a16f405"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e4e1a9261662a2e54d48d84e292d4be209404a045dc77b0b02feb93d2c30fd2"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4b186e914e54c5264c7ef6a1434a6f67eb1f0eccf0791efdfa28ad236c54e68a"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f2cc430d31efa282b0f291ec4bf7a3c902006ef6c3b39afd55fdbec0efc69a0"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:27517529e7d8b3781581be96df0498ce816edaaf110004f2a781d190d4599626"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea4ab00d98cac8b437447b31a8e4659ff1708541e25b4bbf081803240bc2b8eb"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:66517cf629424e952d97449a118b2850f2fbddaa30aecc73205a7160e5eafcb9"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca0f027dc40d64807ee5ed36531c32e76ac8d703050ddbfbb51121ac8a6ea41b"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ab92b2631c43f4b1c8f46082cf6d1ce3c360c2db6226570d2c3007b52c166c1"},
{file = "tokenizers-0.14.1rc1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d882938e4abfe0a19e5259b825cc912a861994814ee08d32bf35bb0485400f03"},
{file = "tokenizers-0.14.1rc1-cp310-none-win32.whl", hash = "sha256:31dd519673ac07f085b94eaa984e59513f20a658726e939b05475a8791740f46"},
{file = "tokenizers-0.14.1rc1-cp310-none-win_amd64.whl", hash = "sha256:18434891acbdba9384742eab0d11b64c52b7e598dad2611b1ab94fae5f19925b"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:6158ef935fe492ab781ecdd4a4d7bac3200e35013ad0f68b192c77c0008a068e"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8d3b1626149585838eee6b224eaaebad311e89ac5568bc2d8d98932b2e4ea1e"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:09921345a164525e8fe5f32566046e5904983b67a058c8f1fa9cdce1dd808fe0"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7ac992c192aac63bd50b399fe5694c43ddb74db194d45558361f3fc5ab6c2c0"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cebac59bbca4b3e74f05aa70caa65c91dc86ee7cfad9a31d65ec76b0595a3d1"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80488ac808d204aab1fd1cc8c6b32e0ebb6d5fee8d008ead71292885106786d9"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04413aade526a5dc99b9f0e886c8da80087a3a7749aed10bebc478db86967624"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e173e43bd6391b61b48a621c06073e7be09be746da1b5d38cd5366e6f43371"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8a1f7cb8e7312fb1938a3fb1e6bad8ba7530bc16faa4998c1f655ffc7591374c"},
{file = "tokenizers-0.14.1rc1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1df053bb1449d58f6790c663d1462f534d29e1b82de58f2c876a7154cf87dfc"},
{file = "tokenizers-0.14.1rc1-cp311-none-win32.whl", hash = "sha256:a794346b829657a964e86d04b9e9ac278abc33a74d1522b2fd1603848019ba4d"},
{file = "tokenizers-0.14.1rc1-cp311-none-win_amd64.whl", hash = "sha256:a33bb476ce6254d1a9cd3327f017465421b86c670929687751a3101dfe7f8d63"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:c682af80d8f17a07a83b22f81285624f3140fa471f7474b81771d3a1a06bb0c5"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e59cb7266f254c0dc887406d9f78f69c9d7518e967abea672be27035c17ac419"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:984ee72d0e7de14dc7e5e879a0c49d4b254b114d170bed21624932d86d02fbbe"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52faaeac04aafe8c7e1fbda89d8a5ea98c84cab4959c67b2845edea5cf9af2e9"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc8288a80af1a4d2c579884139c8c1209b29966e72b2e9e584685ce037e5e52f"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cdff49807fa6aa4b9aa343246ce1dcaaed4194b12a4b3f08dd1f9dee0529f41"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:518a278c70d04adfbc0b5e1dfb60ca4a4b5400c696ed5cdac1df06509a83ec17"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c38ccf5ded4c8ee0383c0ee4a34bb98aa9e269efdccf5e7797e439c2eca7586d"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a5b5f720481be0829647753da88ab0edb0a14172fbbeba6a86c1d652ef65b0cc"},
{file = "tokenizers-0.14.1rc1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9b169520bb366b1af6ceaee3931bbb608ab126dcb469848c7d4091a63d429530"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:ba8e33706d8394854f421ed20a398cddc40b324f35eee09e04479270283377a0"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:859ab4fa794095f58fce5e2f8c0659a8dcbecd44fa231fabb2c7a3dafc1dad6b"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f19fe578c27895520ab3bc94e7459fa22939592a6106bff770dcf3f4040fe95b"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdbf24f5c4cfaab881db8ba7a7910d6795cd256c59d7ca820f9755a2c68580a8"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:27f1ee29ca8be2ed1439966b2b6a40388015d2a4d589933e067c2b0a5d20b8fa"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b75286f5058973b0d77b4cd6ad452332957115036478743ec061865a9daae2cc"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa9e99511bb53470b87666995de482ae1734637f487c5c3ebe2f25047cbf8f7c"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35955de80828a8b4cfc7c838a606b8bd7c3c3bb72be5b6ebab949f1e7a6e9695"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:511de74a6d14e02110585bc541382e3dde82731dc76bcc695de91db96e9d4a49"},
{file = "tokenizers-0.14.1rc1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f5e8fa5e4a83cd71c23bd331719d8de163e594ca5371529afe82b11fb3746c81"},
{file = "tokenizers-0.14.1rc1-cp37-none-win32.whl", hash = "sha256:7677029399fc81f522390fddc4232d3a00c9d0e99f9327d9f3c9e2f7ac0f5d44"},
{file = "tokenizers-0.14.1rc1-cp37-none-win_amd64.whl", hash = "sha256:96725381d1bb37baf6b515a9698870016c3988c705f29549b02d011d36c48981"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:ed0e3077140a48a71098ce96ffc78a9f9aaf267c457d9209983bfd1fa537e71a"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5f071821668df4915dfb92f87ae9e48f54916237c34f825fc9c36601d16668e6"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c789acae0f4a3be7d3859abed52570e05862262040c8161cfe7ea2626122a455"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45a080b65cb602c4934b604d5866926b1e0a86eb57ddc29c299506d7355b7bad"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab6f92fc641215749874a8669b47bdfcc35401fb7060f074ee2bd79ccc5d0d5c"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc8b14a208cd7b7b824f9bc8a4a43f035c01f8cef4b56fc53fe5db3befee1dcd"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:31b1a1fcae82f9cec5600d4ff7a862aac7273377872295a2d832eed5354f79ac"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae44ac8ce5dae3f1ff876d2d21ba0e1252ea64439bc08efe83c6b07fd0e24c3d"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4cf5b14f9be4b0f2c3f9d140cc85c0bf639c5ca3b044adbfc7a8e1bb77acc1be"},
{file = "tokenizers-0.14.1rc1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8da4997db4224dcfb2bd4882ca22a7898375166c06e922a917745d71ca6f88f5"},
{file = "tokenizers-0.14.1rc1-cp38-none-win32.whl", hash = "sha256:a79ba6b08b86703c6015eec54561ace0e4ba9c7b86860d0012662674cf3306d5"},
{file = "tokenizers-0.14.1rc1-cp38-none-win_amd64.whl", hash = "sha256:a06f42248e8ce7e95fb65cff3aef0e37f856d44f0fda0cd11939a0f15ded664a"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:8db1f6194b2b0d40a15b1a55dda6f7542b12d35810f5863628376383f634209a"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8a9d4c0ed354b1d1b2966d34878ffac5382649b79d89a8d6193079978dba2719"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e4c070f880c9d26a216ade750f6f05d97b51c6730a2bcae01db7d71a86b5f253"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41770499dccdbdcaf14f837da4cb45666e9b52e664c004debd7a5169ba9b8004"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:167d7a24b61fa8db179711171f8c7d212e99c1b0e889af417c93b9266b498164"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e505496150ebf84904e44642f006cb52d73a1cd019fe512e36d35cf9b888754"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecc7f0a869d53bc7504760f3107d520701e19a75c50601b04a2ee18019525093"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:344453fae70840f743a606e143d142923809d9d6ff9658da6ca2e14bb883fa0f"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:43041bd1f8fe21e564c4f29b18181da3b5ff26cbdfac83e91bad9a4cefd8f185"},
{file = "tokenizers-0.14.1rc1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:532d2e8fae3f9caa846eeb79c3192a14c85aa67fcd6971feb3f7a3f07659d5b8"},
{file = "tokenizers-0.14.1rc1-cp39-none-win32.whl", hash = "sha256:78b1f2a089d622aff3c1e11908a4ccec2a35727ccd3da117a4d14bd9c08cdfe8"},
{file = "tokenizers-0.14.1rc1-cp39-none-win_amd64.whl", hash = "sha256:42ca382659cbb7ce98625fc2c61203f52cefce77fe8aff57eb235a3c33cd80f4"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4c222dd3b7b3d16d364089e0632cb1d19933f00caac87c1693a70fb28e0cebb2"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:45abd6f83a661b9929205d061c3ccecc223a257b71a1d864ef0f0de9b99a13c5"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:321f88229adc7951ef7e09d10be6277061daa10839dc31aa3d7ab00f259a097d"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0192e30369ea1293fd6a8606cf71aaeb564ef6f7e3851d4f743f077ec5f352d8"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e7784a73c6e4a7cbfd6a70800f74a8e0f3032128bd96ef2c3d9c0bfdc1e3c86"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0a5df9e3114fe17807fa087fb285190f8916db3c308deffa6d268799b64e32c7"},
{file = "tokenizers-0.14.1rc1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82e99643722dd0b19893c430761f4051833509cc531dd92b075e4e06dbcabd54"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c5d33210796615dd870852b2d449d36f629e11a9bc4e6a0d221bc21db1e7f69d"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0cf92ad4c45a644a41d4518379867f414de5ee713ea583558cab791c8aafdfcb"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:060682ae214b420f80d4e466c058ba0affb20c876bf95811ef00d318a73fbc94"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1aa554df4094794a5aa58a37399f6fb10c0249b6ff3381d52918f7b2a8ba13a9"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:56bfdc9ac9d37a7d8ef493a4ca7b29b5551e13ad74da2faba314582ba8d5fcf5"},
{file = "tokenizers-0.14.1rc1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:edf54b8ec629d4bf67f37e3b7c0d796d9c2f332a0c9c8cc46dbe97bc363d10cb"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:07679d0dc8205e4ef2463eea35a79e4730fb16f8954b1cbaf73e6cab8ac78ef2"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c16ec6a26cd54026e1d8af04594f6220cb6997ca9fd488db3c73208d5c08f5e"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a2957a8a999226ec4c1d076bfa5db6d9cfff5eaed5fe25fedcf3053ba01efd28"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2efa20546183690edc14c522258b4414ce74209815f952945abbc12ff4c088eb"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df4fdc8ce235cc31b6c733b3fd45a181e11944e9170e9199be98153e11919801"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d78514ac6d2d8c68c3ee171a2eda9704fb726aa4e0739f68278fe27bb2475cc8"},
{file = "tokenizers-0.14.1rc1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d13d8ba58cf7864fe51787a17f4154fc35bb0daa6d8ec676a387ef58eca87738"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:edb543f3aa12ef234c7ca9c68a5993dd7b4783f395fbd4c2b70774b200271589"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:32d0c89c641bd6e54984c2076910cba6325628d954181c86b33530ed06bbc217"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:19e5c75262268318a33dcfbd9d80d5ea9eb279dc72ad55a9725eea0542598932"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01b2fe42a312a9e9057030af113d944f914b17f3e88b3642b9d886900c7055d5"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f13981f46cfae39a8815ba85da48ff60fae073070546941ce4d91adaa959ea8"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:08220150ce9bd064ff70cc2bd9488354727f085d4bb8c5816e6a6b3f07aed2fa"},
{file = "tokenizers-0.14.1rc1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1015224ccb59a42456ea34e5f4d36838e462d27459dc4e29d4a9b98e02307248"},
{file = "tokenizers-0.14.1rc1.tar.gz", hash = "sha256:a481450e3d21026f79008b5f05294db3684127d5efeeef509e861368b314ede6"},
]
[package.dependencies]
huggingface_hub = ">=0.16.4,<0.18"
[package.extras]
dev = ["tokenizers[testing]"]
docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"]
testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"]
[[package]]
name = "tomli"
version = "2.0.1"
@ -3624,6 +4234,84 @@ files = [
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
[[package]]
name = "torch"
version = "2.1.0"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = true
python-versions = ">=3.8.0"
files = [
{file = "torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bf57f8184b2c317ef81fb33dc233ce4d850cd98ef3f4a38be59c7c1572d175db"},
{file = "torch-2.1.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a04a0296d47f28960f51c18c5489a8c3472f624ec3b5bcc8e2096314df8c3342"},
{file = "torch-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0bd691efea319b14ef239ede16d8a45c246916456fa3ed4f217d8af679433cc6"},
{file = "torch-2.1.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:101c139152959cb20ab370fc192672c50093747906ee4ceace44d8dd703f29af"},
{file = "torch-2.1.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:a6b7438a90a870e4cdeb15301519ae6c043c883fcd224d303c5b118082814767"},
{file = "torch-2.1.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:2224622407ca52611cbc5b628106fde22ed8e679031f5a99ce286629fc696128"},
{file = "torch-2.1.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:8132efb782cd181cc2dcca5e58effbe4217cdb2581206ac71466d535bf778867"},
{file = "torch-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:5c3bfa91ce25ba10116c224c59d5b64cdcce07161321d978bd5a1f15e1ebce72"},
{file = "torch-2.1.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:601b0a2a9d9233fb4b81f7d47dca9680d4f3a78ca3f781078b6ad1ced8a90523"},
{file = "torch-2.1.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:3cd1dedff13884d890f18eea620184fb4cd8fd3c68ce3300498f427ae93aa962"},
{file = "torch-2.1.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fb7bf0cc1a3db484eb5d713942a93172f3bac026fcb377a0cd107093d2eba777"},
{file = "torch-2.1.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:761822761fffaa1c18a62c5deb13abaa780862577d3eadc428f1daa632536905"},
{file = "torch-2.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:458a6d6d8f7d2ccc348ac4d62ea661b39a3592ad15be385bebd0a31ced7e00f4"},
{file = "torch-2.1.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:c8bf7eaf9514465e5d9101e05195183470a6215bb50295c61b52302a04edb690"},
{file = "torch-2.1.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:05661c32ec14bc3a157193d0f19a7b19d8e61eb787b33353cad30202c295e83b"},
{file = "torch-2.1.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:556d8dd3e0c290ed9d4d7de598a213fb9f7c59135b4fee144364a8a887016a55"},
{file = "torch-2.1.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:de7d63c6ecece118684415a3dbd4805af4a4c1ee1490cccf7405d8c240a481b4"},
{file = "torch-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:2419cf49aaf3b2336c7aa7a54a1b949fa295b1ae36f77e2aecb3a74e3a947255"},
{file = "torch-2.1.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6ad491e70dbe4288d17fdbfc7fbfa766d66cbe219bc4871c7a8096f4a37c98df"},
{file = "torch-2.1.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:421739685eba5e0beba42cb649740b15d44b0d565c04e6ed667b41148734a75b"},
]
[package.dependencies]
filelock = "*"
fsspec = "*"
jinja2 = "*"
networkx = "*"
sympy = "*"
typing-extensions = "*"
[package.extras]
opt-einsum = ["opt-einsum (>=3.3)"]
[[package]]
name = "torchvision"
version = "0.16.0"
description = "image and video datasets and models for torch deep learning"
optional = true
python-versions = ">=3.8"
files = [
{file = "torchvision-0.16.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:16c300fdbbe91469f5e9feef8d24c6acabd8849db502a06160dd76ba68e897a0"},
{file = "torchvision-0.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ef5dec6c48b715353781b83749efcdea03835720a71b377684453ee117aab3c7"},
{file = "torchvision-0.16.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:9e3a2012e463f498de21f6598cc7a266b9a8c6fe15788472fdc419233ea6f3f2"},
{file = "torchvision-0.16.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e4327e082b703921ae52caeee4f7839f7e6c73cfc5eedea468ecb5c1487ecdbf"},
{file = "torchvision-0.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:62f01513687cce3480df8928fcc6c09b4aa0433d05ac75e82877acc773f6a568"},
{file = "torchvision-0.16.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:31fdf289bdfb2976f65a14f79f6ddd1ee60113db34622674918e61521c2dc41f"},
{file = "torchvision-0.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2294a6514a31a6fda562288b28cf6db57877237f4b56ff693262f237a7ed4035"},
{file = "torchvision-0.16.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:6a24a1e83e4bc7a31b39ef05d2ca4cd2182e95ff10f525edffe1473f7ce16ca1"},
{file = "torchvision-0.16.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9ed5f21e5a56e466667c6f9f6f93dba2a75e29921108bd70043eaf8e9ba0a7cc"},
{file = "torchvision-0.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:9ee3d4df7d4a84f883f8ad11fb6510549f40f68dd5469eae601d7e02fb4809b2"},
{file = "torchvision-0.16.0-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:0c6f36d00b9ce412e367ad6f42e9054cbc890cd9ddd0d200ed9b3b52dd9c225b"},
{file = "torchvision-0.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:597f60cb03e6f758a00b36b38506f6f38b6c3f1fdfd3921bb9abd60b72d522fd"},
{file = "torchvision-0.16.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eddd91da4603f1dbb340d9aca82344df64605a0897b17014ac8e0b54dd6e5716"},
{file = "torchvision-0.16.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:79875f5247337723ec363762c2716bcfc13b78b3045e4e58847c696f03d9ed4d"},
{file = "torchvision-0.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:550c9793637c5369fbcb4e4b6b0e6d53a4f6cc22389f0563ad60ab90e4f1c8ba"},
{file = "torchvision-0.16.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:de7c7302fa2f67a2a151e595a8e7dc3865a445d952e99d5c682ba78f312fedc3"},
{file = "torchvision-0.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f044cffd252fd293b6df46f38d7eeb2fd4fe931e0114c5263735e3b8c9c60a4f"},
{file = "torchvision-0.16.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:8cb501061f6654da494dd975acc1fa301c4b8aacf96bdbcf1553f51a53ebfd1f"},
{file = "torchvision-0.16.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5a47108ae6a8effdf09fe35fd0c4d5414e69ca8d2334e87339de497b7b64b0c9"},
{file = "torchvision-0.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:9b8f06e6a2f80576007b88846f74b680a1ad3b59d2e22b075587b430180e9cfa"},
]
[package.dependencies]
numpy = "*"
pillow = ">=5.3.0,<8.3.dev0 || >=8.4.dev0"
requests = "*"
torch = "2.1.0"
[package.extras]
scipy = ["scipy"]
[[package]]
name = "tornado"
version = "6.3.3"
@ -3679,6 +4367,75 @@ files = [
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"]
[[package]]
name = "transformers"
version = "4.34.0"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
optional = true
python-versions = ">=3.8.0"
files = [
{file = "transformers-4.34.0-py3-none-any.whl", hash = "sha256:3f0187183a7f22c51ecbbc9eac5145df666c5b86bec6feed10e11f0363f3a1f9"},
{file = "transformers-4.34.0.tar.gz", hash = "sha256:cc2ae61bfbfaa45337fd9017326669fc60e4f55125f589d50da47819e3d6f504"},
]
[package.dependencies]
filelock = "*"
huggingface-hub = ">=0.16.4,<1.0"
numpy = ">=1.17"
packaging = ">=20.0"
pyyaml = ">=5.1"
regex = "!=2019.12.17"
requests = "*"
safetensors = ">=0.3.1"
tokenizers = ">=0.14,<0.15"
tqdm = ">=4.27"
[package.extras]
accelerate = ["accelerate (>=0.20.3)"]
agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"]
all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
codecarbon = ["codecarbon (==1.2.0)"]
deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"]
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"]
dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.15)", "urllib3 (<2.0.0)"]
dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
docs-specific = ["hf-doc-builder"]
fairscale = ["fairscale (>0.3)"]
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
ftfy = ["ftfy"]
integrations = ["optuna", "ray[tune]", "sigopt"]
ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"]
modelcreation = ["cookiecutter (==1.7.3)"]
natten = ["natten (>=0.14.6)"]
onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"]
onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
optuna = ["optuna"]
quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"]
ray = ["ray[tune]"]
retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
sagemaker = ["sagemaker (>=2.31.0)"]
sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"]
sigopt = ["sigopt"]
sklearn = ["scikit-learn"]
speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"]
tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"]
tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"]
tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
timm = ["timm"]
tokenizers = ["tokenizers (>=0.14,<0.15)"]
torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"]
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
torch-vision = ["Pillow (<10.0.0)", "torchvision"]
torchhub = ["filelock", "huggingface-hub (>=0.16.4,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.15)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"]
video = ["av (==9.2.0)", "decord (==0.6.0)"]
vision = ["Pillow (<10.0.0)"]
[[package]]
name = "typer"
version = "0.9.0"
@ -3793,6 +4550,33 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "vowpal-wabbit-next"
version = "0.6.0"
description = "Experimental python bindings for VowpalWabbit"
optional = true
python-versions = ">=3.7"
files = [
{file = "vowpal-wabbit-next-0.6.0.tar.gz", hash = "sha256:f0381614d99fac6a0f52e995ee0bfc7b681054f397bea7ff08b8a523d5315a54"},
{file = "vowpal_wabbit_next-0.6.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:cfbb831cfe9eb81185aff7cdca437ae17c6d9aca8d74e26c326e3ef4ee8e81e7"},
{file = "vowpal_wabbit_next-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d31829778f9c600f5c121f614516ca1bc9ede5d1bc77b1eb3b59b32d9138db9"},
{file = "vowpal_wabbit_next-0.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:714347606ab302a2f72870b6ae6dce58de4bec1b489f4bd65d80a8e326e1db8a"},
{file = "vowpal_wabbit_next-0.6.0-cp311-cp311-macosx_10_13_universal2.whl", hash = "sha256:3a8482d5c0b9357fdb36b62d659e6b74e93aeab165b910292572a98e91d7a014"},
{file = "vowpal_wabbit_next-0.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e4349099b938102f51fb6fedf035bc1deacb2971cd2a48641ca7d45186efda0"},
{file = "vowpal_wabbit_next-0.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:c8f58cdc49f270b1bed6f0fdd7520c8ba1b328de5cd8a2760c0ec70a630de92e"},
{file = "vowpal_wabbit_next-0.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8b7052ce7212fd1cae8ffd966e240c814f3c1df08fd612437d48f0f23e7694c"},
{file = "vowpal_wabbit_next-0.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d24d9c380d0e9b41151337c7f9e2a33ec5bfd738fdee9f65c1a40e486234aca3"},
{file = "vowpal_wabbit_next-0.6.0-cp38-cp38-macosx_10_13_universal2.whl", hash = "sha256:0d77a8c55249ec9a7f404939ecc6948db0527e522e8a7ae149ec7cd29b3ade04"},
{file = "vowpal_wabbit_next-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa2f52f1267fbc26c7757335f9c76a0f00b112971e04c85b8a9bc9e82300597"},
{file = "vowpal_wabbit_next-0.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:5d04f91200ecae73196d9f5601853d63afce8c1c8a0d310a608e8ddfa3b190cb"},
{file = "vowpal_wabbit_next-0.6.0-cp39-cp39-macosx_10_13_universal2.whl", hash = "sha256:2df4a652729c0db34afd8fb4fc49b0090d6f061e2d49899e5f092fd4c3d23253"},
{file = "vowpal_wabbit_next-0.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c289a260ab759f04903b441701cff66ea74d6c061d966caaba0c65ac12d05528"},
{file = "vowpal_wabbit_next-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:8d022cab07274f227df159a81bccf034def7dd54ad70392ee98743ffa4953072"},
]
[package.dependencies]
numpy = "*"
[[package]]
name = "wasabi"
version = "1.1.2"
@ -3974,9 +4758,9 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
[extras]
extended-testing = ["faker", "presidio-analyzer", "presidio-anonymizer"]
extended-testing = ["faker", "presidio-analyzer", "presidio-anonymizer", "sentence-transformers", "vowpal-wabbit-next"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "bb01411c2b2c4d3c52cf6243809cbf7f72692706fb33858d51f8c8ed3dc55663"
content-hash = "32a2a03709f454ae3808ab815a23e5024464946de278eb76281caa305a180522"

@ -14,6 +14,8 @@ langchain = ">=0.0.308"
presidio-anonymizer = {version = "^2.2.33", optional = true}
presidio-analyzer = {version = "^2.2.33", optional = true}
faker = {version = "^19.3.1", optional = true}
vowpal-wabbit-next = {version = "0.6.0", optional = true}
sentence-transformers = {version = "^2", optional = true}
[tool.poetry.group.lint.dependencies]
@ -43,6 +45,8 @@ extended_testing = [
"presidio-anonymizer",
"presidio-analyzer",
"faker",
"vowpal-wabbit-next",
"sentence-transformers",
]
[tool.ruff]

@ -0,0 +1,9 @@
import ctypes
def is_libcublas_available() -> bool:
try:
ctypes.CDLL("libcublas.so")
return True
except OSError:
return False

@ -0,0 +1,459 @@
from typing import Any, Dict
import pytest
from langchain.chat_models import FakeListChatModel
from langchain.prompts.prompt import PromptTemplate
from test_utils import MockEncoder, MockEncoderReturnsList
import langchain_experimental.rl_chain.base as rl_chain
import langchain_experimental.rl_chain.pick_best_chain as pick_best_chain
encoded_keyword = "[encoded]"
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def setup() -> tuple:
_PROMPT_TEMPLATE = """This is a dummy prompt that will be ignored by the fake llm"""
PROMPT = PromptTemplate(input_variables=[], template=_PROMPT_TEMPLATE)
llm = FakeListChatModel(responses=["hey"])
return llm, PROMPT
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_multiple_ToSelectFrom_throws() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
with pytest.raises(ValueError):
chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
another_action=rl_chain.ToSelectFrom(actions),
)
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_missing_basedOn_from_throws() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
with pytest.raises(ValueError):
chain.run(action=rl_chain.ToSelectFrom(actions))
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_ToSelectFrom_not_a_list_throws() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = {"actions": ["0", "1", "2"]}
with pytest.raises(ValueError):
chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
)
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_update_with_delayed_score_with_auto_validator_throws() -> None:
llm, PROMPT = setup()
# this LLM returns a number so that the auto validator will return that
auto_val_llm = FakeListChatModel(responses=["3"])
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm),
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
)
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 3.0 # type: ignore
with pytest.raises(RuntimeError):
chain.update_with_delayed_score(
chain_response=response, score=100 # type: ignore
)
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_update_with_delayed_score_force() -> None:
llm, PROMPT = setup()
# this LLM returns a number so that the auto validator will return that
auto_val_llm = FakeListChatModel(responses=["3"])
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=rl_chain.AutoSelectionScorer(llm=auto_val_llm),
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
)
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 3.0 # type: ignore
chain.update_with_delayed_score(
chain_response=response, score=100, force_score=True # type: ignore
)
assert selection_metadata.selected.score == 100.0 # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_update_with_delayed_score() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=None,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
)
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score is None # type: ignore
chain.update_with_delayed_score(chain_response=response, score=100) # type: ignore
assert selection_metadata.selected.score == 100.0 # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_user_defined_scorer() -> None:
llm, PROMPT = setup()
class CustomSelectionScorer(rl_chain.SelectionScorer):
def score_response(
self,
inputs: Dict[str, Any],
llm_response: str,
event: pick_best_chain.PickBestEvent,
) -> float:
score = 200
return score
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=CustomSelectionScorer(),
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
actions = ["0", "1", "2"]
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(actions),
)
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 200.0 # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_everything_embedded() -> None:
llm, PROMPT = setup()
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
chain = pick_best_chain.PickBest.from_llm(
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1))
expected = f"""shared |User {ctx_str_1 + " " + encoded_ctx_str_1} \n|action {str1 + " " + encoded_str1} \n|action {str2 + " " + encoded_str2} \n|action {str3 + " " + encoded_str3} """ # noqa
actions = [str1, str2, str3]
response = chain.run(
User=rl_chain.EmbedAndKeep(rl_chain.BasedOn(ctx_str_1)),
action=rl_chain.EmbedAndKeep(rl_chain.ToSelectFrom(actions)),
)
selection_metadata = response["selection_metadata"] # type: ignore
vw_str = feature_embedder.format(selection_metadata) # type: ignore
assert vw_str == expected
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_default_auto_embedder_is_off() -> None:
llm, PROMPT = setup()
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
chain = pick_best_chain.PickBest.from_llm(
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder
)
str1 = "0"
str2 = "1"
str3 = "2"
ctx_str_1 = "context1"
expected = f"""shared |User {ctx_str_1} \n|action {str1} \n|action {str2} \n|action {str3} """ # noqa
actions = [str1, str2, str3]
response = chain.run(
User=pick_best_chain.base.BasedOn(ctx_str_1),
action=pick_best_chain.base.ToSelectFrom(actions),
)
selection_metadata = response["selection_metadata"] # type: ignore
vw_str = feature_embedder.format(selection_metadata) # type: ignore
assert vw_str == expected
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_default_w_embeddings_off() -> None:
llm, PROMPT = setup()
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
chain = pick_best_chain.PickBest.from_llm(
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=False
)
str1 = "0"
str2 = "1"
str3 = "2"
ctx_str_1 = "context1"
expected = f"""shared |User {ctx_str_1} \n|action {str1} \n|action {str2} \n|action {str3} """ # noqa
actions = [str1, str2, str3]
response = chain.run(
User=rl_chain.BasedOn(ctx_str_1),
action=rl_chain.ToSelectFrom(actions),
)
selection_metadata = response["selection_metadata"] # type: ignore
vw_str = feature_embedder.format(selection_metadata) # type: ignore
assert vw_str == expected
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_default_w_embeddings_on() -> None:
llm, PROMPT = setup()
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=True, model=MockEncoderReturnsList()
)
chain = pick_best_chain.PickBest.from_llm(
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
)
str1 = "0"
str2 = "1"
ctx_str_1 = "context1"
dot_prod = "dotprod 0:5.0" # dot prod of [1.0, 2.0] and [1.0, 2.0]
expected = f"""shared |User {ctx_str_1} |@ User={ctx_str_1}\n|action {str1} |# action={str1} |{dot_prod}\n|action {str2} |# action={str2} |{dot_prod}""" # noqa
actions = [str1, str2]
response = chain.run(
User=rl_chain.BasedOn(ctx_str_1),
action=rl_chain.ToSelectFrom(actions),
)
selection_metadata = response["selection_metadata"] # type: ignore
vw_str = feature_embedder.format(selection_metadata) # type: ignore
assert vw_str == expected
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_default_embeddings_mixed_w_explicit_user_embeddings() -> None:
llm, PROMPT = setup()
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=True, model=MockEncoderReturnsList()
)
chain = pick_best_chain.PickBest.from_llm(
llm=llm, prompt=PROMPT, feature_embedder=feature_embedder, auto_embed=True
)
str1 = "0"
str2 = "1"
encoded_str2 = rl_chain.stringify_embedding([1.0, 2.0])
ctx_str_1 = "context1"
ctx_str_2 = "context2"
encoded_ctx_str_1 = rl_chain.stringify_embedding([1.0, 2.0])
dot_prod = "dotprod 0:5.0 1:5.0" # dot prod of [1.0, 2.0] and [1.0, 2.0]
expected = f"""shared |User {encoded_ctx_str_1} |@ User={encoded_ctx_str_1} |User2 {ctx_str_2} |@ User2={ctx_str_2}\n|action {str1} |# action={str1} |{dot_prod}\n|action {encoded_str2} |# action={encoded_str2} |{dot_prod}""" # noqa
actions = [str1, rl_chain.Embed(str2)]
response = chain.run(
User=rl_chain.BasedOn(rl_chain.Embed(ctx_str_1)),
User2=rl_chain.BasedOn(ctx_str_2),
action=rl_chain.ToSelectFrom(actions),
)
selection_metadata = response["selection_metadata"] # type: ignore
vw_str = feature_embedder.format(selection_metadata) # type: ignore
assert vw_str == expected
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_default_no_scorer_specified() -> None:
_, PROMPT = setup()
chain_llm = FakeListChatModel(responses=["hey", "100"])
chain = pick_best_chain.PickBest.from_llm(
llm=chain_llm,
prompt=PROMPT,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(["0", "1", "2"]),
)
# chain llm used for both basic prompt and for scoring
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 100.0 # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_explicitly_no_scorer() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=None,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(["0", "1", "2"]),
)
# chain llm used for both basic prompt and for scoring
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score is None # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_auto_scorer_with_user_defined_llm() -> None:
llm, PROMPT = setup()
scorer_llm = FakeListChatModel(responses=["300"])
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=rl_chain.AutoSelectionScorer(llm=scorer_llm),
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
response = chain.run(
User=rl_chain.BasedOn("Context"),
action=rl_chain.ToSelectFrom(["0", "1", "2"]),
)
# chain llm used for both basic prompt and for scoring
assert response["response"] == "hey" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 300.0 # type: ignore
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_calling_chain_w_reserved_inputs_throws() -> None:
llm, PROMPT = setup()
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
with pytest.raises(ValueError):
chain.run(
User=rl_chain.BasedOn("Context"),
rl_chain_selected_based_on=rl_chain.ToSelectFrom(["0", "1", "2"]),
)
with pytest.raises(ValueError):
chain.run(
User=rl_chain.BasedOn("Context"),
rl_chain_selected=rl_chain.ToSelectFrom(["0", "1", "2"]),
)
@pytest.mark.requires("vowpal_wabbit_next", "sentence_transformers")
def test_activate_and_deactivate_scorer() -> None:
_, PROMPT = setup()
llm = FakeListChatModel(responses=["hey1", "hey2", "hey3"])
scorer_llm = FakeListChatModel(responses=["300", "400"])
chain = pick_best_chain.PickBest.from_llm(
llm=llm,
prompt=PROMPT,
selection_scorer=pick_best_chain.base.AutoSelectionScorer(llm=scorer_llm),
feature_embedder=pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
),
)
response = chain.run(
User=pick_best_chain.base.BasedOn("Context"),
action=pick_best_chain.base.ToSelectFrom(["0", "1", "2"]),
)
# chain llm used for both basic prompt and for scoring
assert response["response"] == "hey1" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 300.0 # type: ignore
chain.deactivate_selection_scorer()
response = chain.run(
User=pick_best_chain.base.BasedOn("Context"),
action=pick_best_chain.base.ToSelectFrom(["0", "1", "2"]),
)
assert response["response"] == "hey2" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score is None # type: ignore
chain.activate_selection_scorer()
response = chain.run(
User=pick_best_chain.base.BasedOn("Context"),
action=pick_best_chain.base.ToSelectFrom(["0", "1", "2"]),
)
assert response["response"] == "hey3" # type: ignore
selection_metadata = response["selection_metadata"] # type: ignore
assert selection_metadata.selected.score == 400.0 # type: ignore

@ -0,0 +1,370 @@
import pytest
from test_utils import MockEncoder
import langchain_experimental.rl_chain.base as rl_chain
import langchain_experimental.rl_chain.pick_best_chain as pick_best_chain
encoded_keyword = "[encoded]"
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_missing_context_throws() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_action = {"action": ["0", "1", "2"]}
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_action, based_on={}
)
with pytest.raises(ValueError):
feature_embedder.format(event)
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_missing_actions_throws() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from={}, based_on={"context": "context"}
)
with pytest.raises(ValueError):
feature_embedder.format(event)
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_no_label_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": ["0", "1", "2"]}
expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on={"context": "context"}
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_w_label_no_score_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": ["0", "1", "2"]}
expected = """shared |context context \n|action1 0 \n|action1 1 \n|action1 2 """
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0)
event = pick_best_chain.PickBestEvent(
inputs={},
to_select_from=named_actions,
based_on={"context": "context"},
selected=selected,
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_w_full_label_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": ["0", "1", "2"]}
expected = (
"""shared |context context \n0:-0.0:1.0 |action1 0 \n|action1 1 \n|action1 2 """
)
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={},
to_select_from=named_actions,
based_on={"context": "context"},
selected=selected,
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_w_full_label_w_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1))
named_actions = {"action1": rl_chain.Embed([str1, str2, str3])}
context = {"context": rl_chain.Embed(ctx_str_1)}
expected = f"""shared |context {encoded_ctx_str_1} \n0:-0.0:1.0 |action1 {encoded_str1} \n|action1 {encoded_str2} \n|action1 {encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1))
named_actions = {"action1": rl_chain.EmbedAndKeep([str1, str2, str3])}
context = {"context": rl_chain.EmbedAndKeep(ctx_str_1)}
expected = f"""shared |context {ctx_str_1 + " " + encoded_ctx_str_1} \n0:-0.0:1.0 |action1 {str1 + " " + encoded_str1} \n|action1 {str2 + " " + encoded_str2} \n|action1 {str3 + " " + encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]}
context = {"context1": "context1", "context2": "context2"}
expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]}
context = {"context1": "context1", "context2": "context2"}
expected = """shared |context1 context1 |context2 context2 \n|a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]}
context = {"context1": "context1", "context2": "context2"}
expected = """shared |context1 context1 |context2 context2 \n0:-0.0:1.0 |a 0 |b 0 \n|action1 1 \n|action1 2 """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
ctx_str_2 = "context2"
encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1))
encoded_ctx_str_2 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_2))
named_actions = {"action1": rl_chain.Embed([{"a": str1, "b": str1}, str2, str3])}
context = {
"context1": rl_chain.Embed(ctx_str_1),
"context2": rl_chain.Embed(ctx_str_2),
}
expected = f"""shared |context1 {encoded_ctx_str_1} |context2 {encoded_ctx_str_2} \n0:-0.0:1.0 |a {encoded_str1} |b {encoded_str1} \n|action1 {encoded_str2} \n|action1 {encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_keep() -> (
None
):
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = rl_chain.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
ctx_str_2 = "context2"
encoded_ctx_str_1 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_1))
encoded_ctx_str_2 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_2))
named_actions = {
"action1": rl_chain.EmbedAndKeep([{"a": str1, "b": str1}, str2, str3])
}
context = {
"context1": rl_chain.EmbedAndKeep(ctx_str_1),
"context2": rl_chain.EmbedAndKeep(ctx_str_2),
}
expected = f"""shared |context1 {ctx_str_1 + " " + encoded_ctx_str_1} |context2 {ctx_str_2 + " " + encoded_ctx_str_2} \n0:-0.0:1.0 |a {str1 + " " + encoded_str1} |b {str1 + " " + encoded_str1} \n|action1 {str2 + " " + encoded_str2} \n|action1 {str3 + " " + encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
ctx_str_2 = "context2"
encoded_ctx_str_2 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_2))
named_actions = {
"action1": [
{"a": str1, "b": rl_chain.Embed(str1)},
str2,
rl_chain.Embed(str3),
]
}
context = {"context1": ctx_str_1, "context2": rl_chain.Embed(ctx_str_2)}
expected = f"""shared |context1 {ctx_str_1} |context2 {encoded_ctx_str_2} \n0:-0.0:1.0 |a {str1} |b {encoded_str1} \n|action1 {str2} \n|action1 {encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "0"
str2 = "1"
str3 = "2"
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
encoded_str3 = rl_chain.stringify_embedding(list(encoded_keyword + str3))
ctx_str_1 = "context1"
ctx_str_2 = "context2"
encoded_ctx_str_2 = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str_2))
named_actions = {
"action1": [
{"a": str1, "b": rl_chain.EmbedAndKeep(str1)},
str2,
rl_chain.EmbedAndKeep(str3),
]
}
context = {
"context1": ctx_str_1,
"context2": rl_chain.EmbedAndKeep(ctx_str_2),
}
expected = f"""shared |context1 {ctx_str_1} |context2 {ctx_str_2 + " " + encoded_ctx_str_2} \n0:-0.0:1.0 |a {str1} |b {str1 + " " + encoded_str1} \n|action1 {str2} \n|action1 {str3 + " " + encoded_str3} """ # noqa: E501
selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context, selected=selected
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_raw_features_underscored() -> None:
feature_embedder = pick_best_chain.PickBestFeatureEmbedder(
auto_embed=False, model=MockEncoder()
)
str1 = "this is a long string"
str1_underscored = str1.replace(" ", "_")
encoded_str1 = rl_chain.stringify_embedding(list(encoded_keyword + str1))
ctx_str = "this is a long context"
ctx_str_underscored = ctx_str.replace(" ", "_")
encoded_ctx_str = rl_chain.stringify_embedding(list(encoded_keyword + ctx_str))
# No embeddings
named_actions = {"action": [str1]}
context = {"context": ctx_str}
expected_no_embed = (
f"""shared |context {ctx_str_underscored} \n|action {str1_underscored} """
)
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected_no_embed
# Just embeddings
named_actions = {"action": rl_chain.Embed([str1])}
context = {"context": rl_chain.Embed(ctx_str)}
expected_embed = f"""shared |context {encoded_ctx_str} \n|action {encoded_str1} """
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected_embed
# Embeddings and raw features
named_actions = {"action": rl_chain.EmbedAndKeep([str1])}
context = {"context": rl_chain.EmbedAndKeep(ctx_str)}
expected_embed_and_keep = f"""shared |context {ctx_str_underscored + " " + encoded_ctx_str} \n|action {str1_underscored + " " + encoded_str1} """ # noqa: E501
event = pick_best_chain.PickBestEvent(
inputs={}, to_select_from=named_actions, based_on=context
)
vw_ex_str = feature_embedder.format(event)
assert vw_ex_str == expected_embed_and_keep

@ -0,0 +1,422 @@
from typing import List, Union
import pytest
from test_utils import MockEncoder
import langchain_experimental.rl_chain.base as base
encoded_keyword = "[encoded]"
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_context_str_no_emb() -> None:
expected = [{"a_namespace": "test"}]
assert base.embed("test", MockEncoder(), "a_namespace") == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_context_str_w_emb() -> None:
str1 = "test"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
expected = [{"a_namespace": encoded_str1}]
assert base.embed(base.Embed(str1), MockEncoder(), "a_namespace") == expected
expected_embed_and_keep = [{"a_namespace": str1 + " " + encoded_str1}]
assert (
base.embed(base.EmbedAndKeep(str1), MockEncoder(), "a_namespace")
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_context_str_w_nested_emb() -> None:
# nested embeddings, innermost wins
str1 = "test"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
expected = [{"a_namespace": encoded_str1}]
assert (
base.embed(base.EmbedAndKeep(base.Embed(str1)), MockEncoder(), "a_namespace")
== expected
)
expected2 = [{"a_namespace": str1 + " " + encoded_str1}]
assert (
base.embed(base.Embed(base.EmbedAndKeep(str1)), MockEncoder(), "a_namespace")
== expected2
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_context_w_namespace_no_emb() -> None:
expected = [{"test_namespace": "test"}]
assert base.embed({"test_namespace": "test"}, MockEncoder()) == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_context_w_namespace_w_emb() -> None:
str1 = "test"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
expected = [{"test_namespace": encoded_str1}]
assert base.embed({"test_namespace": base.Embed(str1)}, MockEncoder()) == expected
expected_embed_and_keep = [{"test_namespace": str1 + " " + encoded_str1}]
assert (
base.embed({"test_namespace": base.EmbedAndKeep(str1)}, MockEncoder())
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_context_w_namespace_w_emb2() -> None:
str1 = "test"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
expected = [{"test_namespace": encoded_str1}]
assert base.embed(base.Embed({"test_namespace": str1}), MockEncoder()) == expected
expected_embed_and_keep = [{"test_namespace": str1 + " " + encoded_str1}]
assert (
base.embed(base.EmbedAndKeep({"test_namespace": str1}), MockEncoder())
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_context_w_namespace_w_some_emb() -> None:
str1 = "test1"
str2 = "test2"
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
expected = [{"test_namespace": str1, "test_namespace2": encoded_str2}]
assert (
base.embed(
{"test_namespace": str1, "test_namespace2": base.Embed(str2)}, MockEncoder()
)
== expected
)
expected_embed_and_keep = [
{
"test_namespace": str1,
"test_namespace2": str2 + " " + encoded_str2,
}
]
assert (
base.embed(
{"test_namespace": str1, "test_namespace2": base.EmbedAndKeep(str2)},
MockEncoder(),
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_action_strlist_no_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
expected = [{"a_namespace": str1}, {"a_namespace": str2}, {"a_namespace": str3}]
to_embed: List[Union[str, base._Embed]] = [str1, str2, str3]
assert base.embed(to_embed, MockEncoder(), "a_namespace") == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_action_strlist_w_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"a_namespace": encoded_str1},
{"a_namespace": encoded_str2},
{"a_namespace": encoded_str3},
]
assert (
base.embed(base.Embed([str1, str2, str3]), MockEncoder(), "a_namespace")
== expected
)
expected_embed_and_keep = [
{"a_namespace": str1 + " " + encoded_str1},
{"a_namespace": str2 + " " + encoded_str2},
{"a_namespace": str3 + " " + encoded_str3},
]
assert (
base.embed(base.EmbedAndKeep([str1, str2, str3]), MockEncoder(), "a_namespace")
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_simple_action_strlist_w_some_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"a_namespace": str1},
{"a_namespace": encoded_str2},
{"a_namespace": encoded_str3},
]
assert (
base.embed(
[str1, base.Embed(str2), base.Embed(str3)], MockEncoder(), "a_namespace"
)
== expected
)
expected_embed_and_keep = [
{"a_namespace": str1},
{"a_namespace": str2 + " " + encoded_str2},
{"a_namespace": str3 + " " + encoded_str3},
]
assert (
base.embed(
[str1, base.EmbedAndKeep(str2), base.EmbedAndKeep(str3)],
MockEncoder(),
"a_namespace",
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_action_w_namespace_no_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
expected = [
{"test_namespace": str1},
{"test_namespace": str2},
{"test_namespace": str3},
]
assert (
base.embed(
[
{"test_namespace": str1},
{"test_namespace": str2},
{"test_namespace": str3},
],
MockEncoder(),
)
== expected
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_action_w_namespace_w_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"test_namespace": encoded_str1},
{"test_namespace": encoded_str2},
{"test_namespace": encoded_str3},
]
assert (
base.embed(
[
{"test_namespace": base.Embed(str1)},
{"test_namespace": base.Embed(str2)},
{"test_namespace": base.Embed(str3)},
],
MockEncoder(),
)
== expected
)
expected_embed_and_keep = [
{"test_namespace": str1 + " " + encoded_str1},
{"test_namespace": str2 + " " + encoded_str2},
{"test_namespace": str3 + " " + encoded_str3},
]
assert (
base.embed(
[
{"test_namespace": base.EmbedAndKeep(str1)},
{"test_namespace": base.EmbedAndKeep(str2)},
{"test_namespace": base.EmbedAndKeep(str3)},
],
MockEncoder(),
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_action_w_namespace_w_emb2() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"test_namespace1": encoded_str1},
{"test_namespace2": encoded_str2},
{"test_namespace3": encoded_str3},
]
assert (
base.embed(
base.Embed(
[
{"test_namespace1": str1},
{"test_namespace2": str2},
{"test_namespace3": str3},
]
),
MockEncoder(),
)
== expected
)
expected_embed_and_keep = [
{"test_namespace1": str1 + " " + encoded_str1},
{"test_namespace2": str2 + " " + encoded_str2},
{"test_namespace3": str3 + " " + encoded_str3},
]
assert (
base.embed(
base.EmbedAndKeep(
[
{"test_namespace1": str1},
{"test_namespace2": str2},
{"test_namespace3": str3},
]
),
MockEncoder(),
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_action_w_namespace_w_some_emb() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"test_namespace": str1},
{"test_namespace": encoded_str2},
{"test_namespace": encoded_str3},
]
assert (
base.embed(
[
{"test_namespace": str1},
{"test_namespace": base.Embed(str2)},
{"test_namespace": base.Embed(str3)},
],
MockEncoder(),
)
== expected
)
expected_embed_and_keep = [
{"test_namespace": str1},
{"test_namespace": str2 + " " + encoded_str2},
{"test_namespace": str3 + " " + encoded_str3},
]
assert (
base.embed(
[
{"test_namespace": str1},
{"test_namespace": base.EmbedAndKeep(str2)},
{"test_namespace": base.EmbedAndKeep(str3)},
],
MockEncoder(),
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_action_w_namespace_w_emb_w_more_than_one_item_in_first_dict() -> None:
str1 = "test1"
str2 = "test2"
str3 = "test3"
encoded_str1 = base.stringify_embedding(list(encoded_keyword + str1))
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
encoded_str3 = base.stringify_embedding(list(encoded_keyword + str3))
expected = [
{"test_namespace": encoded_str1, "test_namespace2": str1},
{"test_namespace": encoded_str2, "test_namespace2": str2},
{"test_namespace": encoded_str3, "test_namespace2": str3},
]
assert (
base.embed(
[
{"test_namespace": base.Embed(str1), "test_namespace2": str1},
{"test_namespace": base.Embed(str2), "test_namespace2": str2},
{"test_namespace": base.Embed(str3), "test_namespace2": str3},
],
MockEncoder(),
)
== expected
)
expected_embed_and_keep = [
{
"test_namespace": str1 + " " + encoded_str1,
"test_namespace2": str1,
},
{
"test_namespace": str2 + " " + encoded_str2,
"test_namespace2": str2,
},
{
"test_namespace": str3 + " " + encoded_str3,
"test_namespace2": str3,
},
]
assert (
base.embed(
[
{"test_namespace": base.EmbedAndKeep(str1), "test_namespace2": str1},
{"test_namespace": base.EmbedAndKeep(str2), "test_namespace2": str2},
{"test_namespace": base.EmbedAndKeep(str3), "test_namespace2": str3},
],
MockEncoder(),
)
== expected_embed_and_keep
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_one_namespace_w_list_of_features_no_emb() -> None:
str1 = "test1"
str2 = "test2"
expected = [{"test_namespace": [str1, str2]}]
assert base.embed({"test_namespace": [str1, str2]}, MockEncoder()) == expected
@pytest.mark.requires("vowpal_wabbit_next")
def test_one_namespace_w_list_of_features_w_some_emb() -> None:
str1 = "test1"
str2 = "test2"
encoded_str2 = base.stringify_embedding(list(encoded_keyword + str2))
expected = [{"test_namespace": [str1, encoded_str2]}]
assert (
base.embed({"test_namespace": [str1, base.Embed(str2)]}, MockEncoder())
== expected
)
@pytest.mark.requires("vowpal_wabbit_next")
def test_nested_list_features_throws() -> None:
with pytest.raises(ValueError):
base.embed({"test_namespace": [[1, 2], [3, 4]]}, MockEncoder())
@pytest.mark.requires("vowpal_wabbit_next")
def test_dict_in_list_throws() -> None:
with pytest.raises(ValueError):
base.embed({"test_namespace": [{"a": 1}, {"b": 2}]}, MockEncoder())
@pytest.mark.requires("vowpal_wabbit_next")
def test_nested_dict_throws() -> None:
with pytest.raises(ValueError):
base.embed({"test_namespace": {"a": {"b": 1}}}, MockEncoder())
@pytest.mark.requires("vowpal_wabbit_next")
def test_list_of_tuples_throws() -> None:
with pytest.raises(ValueError):
base.embed({"test_namespace": [("a", 1), ("b", 2)]}, MockEncoder())

@ -0,0 +1,15 @@
from typing import Any, List
class MockEncoder:
def encode(self, to_encode: str) -> str:
return "[encoded]" + to_encode
class MockEncoderReturnsList:
def encode(self, to_encode: Any) -> List:
if isinstance(to_encode, str):
return [1.0, 2.0]
elif isinstance(to_encode, List):
return [[1.0, 2.0] for _ in range(len(to_encode))]
raise ValueError("Invalid input type for unit test")

@ -2,6 +2,8 @@ from typing import Iterator, List
import pytest
from . import is_libcublas_available
@pytest.fixture(scope="module", autouse=True)
def check_spacy_model() -> Iterator[None]:
@ -12,6 +14,13 @@ def check_spacy_model() -> Iterator[None]:
yield
@pytest.fixture(scope="module", autouse=True)
def check_libcublas() -> Iterator[None]:
if not is_libcublas_available():
pytest.skip(reason="libcublas.so is not available")
yield
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
@pytest.mark.parametrize(
"analyzed_fields,should_contain",

@ -3,6 +3,8 @@ from typing import Iterator, List
import pytest
from . import is_libcublas_available
@pytest.fixture(scope="module", autouse=True)
def check_spacy_model() -> Iterator[None]:
@ -13,6 +15,13 @@ def check_spacy_model() -> Iterator[None]:
yield
@pytest.fixture(scope="module", autouse=True)
def check_libcublas() -> Iterator[None]:
if not is_libcublas_available():
pytest.skip(reason="libcublas.so is not available")
yield
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
@pytest.mark.parametrize(
"analyzed_fields,should_contain",

Loading…
Cancel
Save