From c37fd29fd896695963f04217d7e554139cf5b2f8 Mon Sep 17 00:00:00 2001 From: olgavrou Date: Fri, 18 Aug 2023 02:22:00 -0400 Subject: [PATCH] move tests to correct directory and cleanup slates examples --- .../langchain/chains/rl_chain/__init__.py | 5 - .../chains/rl_chain/requirements.txt | 7 - .../tests/test_slates_text_embedder.py | 124 ------------------ .../chains/rl_chain/tests/test_utils.py | 14 -- libs/langchain/pyproject.toml | 1 + .../rl_chain}/test_pick_best_chain_call.py | 6 +- .../rl_chain}/test_pick_best_text_embedder.py | 6 +- .../rl_chain}/test_rl_chain_base_embedder.py | 6 +- .../unit_tests/chains/rl_chain/test_utils.py | 3 + 9 files changed, 7 insertions(+), 165 deletions(-) delete mode 100644 libs/langchain/langchain/chains/rl_chain/requirements.txt delete mode 100644 libs/langchain/langchain/chains/rl_chain/tests/test_slates_text_embedder.py delete mode 100644 libs/langchain/langchain/chains/rl_chain/tests/test_utils.py rename libs/langchain/{langchain/chains/rl_chain/tests => tests/unit_tests/chains/rl_chain}/test_pick_best_chain_call.py (99%) rename libs/langchain/{langchain/chains/rl_chain/tests => tests/unit_tests/chains/rl_chain}/test_pick_best_text_embedder.py (99%) rename libs/langchain/{langchain/chains/rl_chain/tests => tests/unit_tests/chains/rl_chain}/test_rl_chain_base_embedder.py (99%) create mode 100644 libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py diff --git a/libs/langchain/langchain/chains/rl_chain/__init__.py b/libs/langchain/langchain/chains/rl_chain/__init__.py index 1d9c216cad..35b0ed7b49 100644 --- a/libs/langchain/langchain/chains/rl_chain/__init__.py +++ b/libs/langchain/langchain/chains/rl_chain/__init__.py @@ -1,9 +1,4 @@ from .pick_best_chain import PickBest -from .slates_chain import ( - SlatesPersonalizerChain, - SlatesRandomPolicy, - SlatesFirstChoicePolicy, -) from .rl_chain_base import ( Embed, BasedOn, diff --git a/libs/langchain/langchain/chains/rl_chain/requirements.txt b/libs/langchain/langchain/chains/rl_chain/requirements.txt deleted file mode 100644 index faf213caed..0000000000 --- a/libs/langchain/langchain/chains/rl_chain/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -vowpal-wabbit-next -langchain -openai -sentence_transformers -pandas -numpy -matplotlib diff --git a/libs/langchain/langchain/chains/rl_chain/tests/test_slates_text_embedder.py b/libs/langchain/langchain/chains/rl_chain/tests/test_slates_text_embedder.py deleted file mode 100644 index b5c094bd55..0000000000 --- a/libs/langchain/langchain/chains/rl_chain/tests/test_slates_text_embedder.py +++ /dev/null @@ -1,124 +0,0 @@ -import sys - -sys.path.append("..") - -import rl_chain.slates_chain as slates -from test_utils import MockEncoder - -import pytest - -encoded_keyword = "[encoded]" -encoded_text = "[ e n c o d e d ] " - - -def test_slate_text_creation_no_label_no_emb(): - named_actions = {"prefix": ["0", "1"], "context": ["bla"], "suffix": ["0", "1"]} - expected = """slates shared |\nslates action 0 |Action 0\nslates action 0 |Action 1\nslates action 1 |Action bla\nslates action 2 |Action 0\nslates action 2 |Action 1\nslates slot |\nslates slot |\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder() - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on={} - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected - - -def _str(embedding): - return " ".join([f"{i}:{e}" for i, e in enumerate(embedding)]) - - -def test_slate_text_creation_no_label_w_emb(): - action00 = "0" - action01 = "1" - action10 = "bla" - action20 = "0" - action21 = "1" - encoded_action00 = _str(encoded_keyword + action00) - encoded_action01 = _str(encoded_keyword + action01) - encoded_action10 = _str(encoded_keyword + action10) - encoded_action20 = _str(encoded_keyword + action20) - encoded_action21 = _str(encoded_keyword + action21) - - named_actions = { - "prefix": slates.base.Embed(["0", "1"]), - "context": slates.base.Embed(["bla"]), - "suffix": slates.base.Embed(["0", "1"]), - } - expected = f"""slates shared |\nslates action 0 |Action {encoded_action00}\nslates action 0 |Action {encoded_action01}\nslates action 1 |Action {encoded_action10}\nslates action 2 |Action {encoded_action20}\nslates action 2 |Action {encoded_action21}\nslates slot |\nslates slot |\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder(model=MockEncoder()) - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on={} - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected - - -def test_slate_text_create_no_label_w_embed_and_keep(): - action00 = "0" - action01 = "1" - action10 = "bla" - action20 = "0" - action21 = "1" - encoded_action00 = _str(encoded_keyword + action00) - encoded_action01 = _str(encoded_keyword + action01) - encoded_action10 = _str(encoded_keyword + action10) - encoded_action20 = _str(encoded_keyword + action20) - encoded_action21 = _str(encoded_keyword + action21) - - named_actions = { - "prefix": slates.base.EmbedAndKeep(["0", "1"]), - "context": slates.base.EmbedAndKeep(["bla"]), - "suffix": slates.base.EmbedAndKeep(["0", "1"]), - } - expected = f"""slates shared |\nslates action 0 |Action {action00 + " " + encoded_action00}\nslates action 0 |Action {action01 + " " + encoded_action01}\nslates action 1 |Action {action10 + " " + encoded_action10}\nslates action 2 |Action {action20 + " " + encoded_action20}\nslates action 2 |Action {action21 + " " + encoded_action21}\nslates slot |\nslates slot |\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder(model=MockEncoder()) - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on={} - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected - - -def test_slates_raw_features_underscored(): - action00 = "this is a long action 0" - action01 = "this is a long action 1" - action00_underscored = action00.replace(" ", "_") - action01_underscored = action01.replace(" ", "_") - encoded_action00 = _str(encoded_keyword + action00) - encoded_action01 = _str(encoded_keyword + action01) - - ctx_str = "this is a long context" - ctx_str_underscored = ctx_str.replace(" ", "_") - encoded_ctx_str = encoded_text + " ".join(char for char in ctx_str) - - # No Embeddings - named_actions = {"prefix": [action00, action01]} - context = {"context": ctx_str} - expected_no_embed = f"""slates shared |context {ctx_str_underscored} \nslates action 0 |Action {action00_underscored}\nslates action 0 |Action {action01_underscored}\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder(model=MockEncoder()) - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on=context - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected_no_embed - - # Just embeddings - named_actions = {"prefix": slates.base.Embed([action00, action01])} - context = {"context": slates.base.Embed(ctx_str)} - expected_embed = f"""slates shared |context {encoded_ctx_str} \nslates action 0 |Action {encoded_action00}\nslates action 0 |Action {encoded_action01}\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder(model=MockEncoder()) - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on=context - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected_embed - - # Embeddings and raw features - named_actions = {"prefix": slates.base.EmbedAndKeep([action00, action01])} - context = {"context": slates.base.EmbedAndKeep(ctx_str)} - expected_embed_and_keep = f"""slates shared |context {ctx_str_underscored + " " + encoded_ctx_str} \nslates action 0 |Action {action00_underscored + " " + encoded_action00}\nslates action 0 |Action {action01_underscored + " " + encoded_action01}\nslates slot |""" - feature_embedder = slates.SlatesFeatureEmbedder(model=MockEncoder()) - event = slates.SlatesPersonalizerChain.Event( - inputs={}, to_select_from=named_actions, based_on=context - ) - vw_str_ex = feature_embedder.format(event) - assert vw_str_ex == expected_embed_and_keep diff --git a/libs/langchain/langchain/chains/rl_chain/tests/test_utils.py b/libs/langchain/langchain/chains/rl_chain/tests/test_utils.py deleted file mode 100644 index 8b3773165e..0000000000 --- a/libs/langchain/langchain/chains/rl_chain/tests/test_utils.py +++ /dev/null @@ -1,14 +0,0 @@ -from rl_chain import SelectionScorer -from typing import Dict, Any - - -class MockScorer(SelectionScorer): - def score_response( - self, inputs: Dict[str, Any], llm_response: str, **kwargs - ) -> float: - return float(llm_response) - - -class MockEncoder: - def encode(self, to_encode): - return "[encoded]" + to_encode diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 8c3c8c18df..25087bf228 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -125,6 +125,7 @@ newspaper3k = {version = "^0.2.8", optional = true} amazon-textract-caller = {version = "<2", optional = true} xata = {version = "^1.0.0a7", optional = true} xmltodict = {version = "^0.13.0", optional = true} +vowpal-wabbit-next = "0.6.0" [tool.poetry.group.test.dependencies] diff --git a/libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_chain_call.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py similarity index 99% rename from libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_chain_call.py rename to libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py index 6c8db426d4..3e739f44a5 100644 --- a/libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_chain_call.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_chain_call.py @@ -1,8 +1,4 @@ -import sys - -sys.path.append("..") - -import rl_chain.pick_best_chain as pick_best_chain +import langchain.chains.rl_chain.pick_best_chain as pick_best_chain from test_utils import MockEncoder import pytest from langchain.prompts.prompt import PromptTemplate diff --git a/libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_text_embedder.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py similarity index 99% rename from libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_text_embedder.py rename to libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py index 29d8d9af69..7dc6d7f474 100644 --- a/libs/langchain/langchain/chains/rl_chain/tests/test_pick_best_text_embedder.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_pick_best_text_embedder.py @@ -1,8 +1,4 @@ -import sys - -sys.path.append("..") - -import rl_chain.pick_best_chain as pick_best_chain +import langchain.chains.rl_chain.pick_best_chain as pick_best_chain from test_utils import MockEncoder import pytest diff --git a/libs/langchain/langchain/chains/rl_chain/tests/test_rl_chain_base_embedder.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_rl_chain_base_embedder.py similarity index 99% rename from libs/langchain/langchain/chains/rl_chain/tests/test_rl_chain_base_embedder.py rename to libs/langchain/tests/unit_tests/chains/rl_chain/test_rl_chain_base_embedder.py index 7ae232bbca..4f9ddcc1ac 100644 --- a/libs/langchain/langchain/chains/rl_chain/tests/test_rl_chain_base_embedder.py +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_rl_chain_base_embedder.py @@ -1,8 +1,4 @@ -import sys - -sys.path.append("..") - -import rl_chain.rl_chain_base as base +import langchain.chains.rl_chain.rl_chain_base as base from test_utils import MockEncoder import pytest diff --git a/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py b/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py new file mode 100644 index 0000000000..6d54d20d92 --- /dev/null +++ b/libs/langchain/tests/unit_tests/chains/rl_chain/test_utils.py @@ -0,0 +1,3 @@ +class MockEncoder: + def encode(self, to_encode): + return "[encoded]" + to_encode