added model class

12 months ago · af90f4444d
parent dbfc7c6a4f
commit af90f4444d
12 changed files with 96 additions and 47 deletions
--- a/programming_runs/generators/init.py
+++ b/programming_runs/generators/init.py
@ -1,3 +1,4 @@
 from .py_generate import PyGenerator
 from .rs_generate import RsGenerator
-from .factory import generator_factory
+from .factory import generator_factory, model_factory
+from .model import ModelBase, GPT4, GPT35
--- a/programming_runs/generators/factory.py
+++ b/programming_runs/generators/factory.py
@ -1,6 +1,8 @@
 from .py_generate import PyGenerator
 from .rs_generate import RsGenerator
 from .generator_types import Generator
+from .model import ModelBase, GPT4, GPT35
+

 def generator_factory(lang: str) -> Generator:
    if lang == "py" or lang == "python":
@ -9,3 +11,12 @@ def generator_factory(lang: str) -> Generator:
        return RsGenerator()
    else:
        raise ValueError(f"Invalid language for generator: {lang}")
+
+
+def model_factory(model_name: str) -> ModelBase:
+    if model_name == "gpt-4":
+        return GPT4()
+    elif model_name == "gpt-3.5-turbo":
+        return GPT35()
+    else:
+        raise ValueError(f"Invalid model name: {model_name}")
--- a/programming_runs/generators/generator_types.py
+++ b/programming_runs/generators/generator_types.py
@ -1,16 +1,19 @@
 from typing import List, Optional, Union
 from abc import abstractmethod, ABC

+from generators.model import ModelBase
+
+
 class Generator:
    @abstractmethod
-    def self_reflection(self, func: str, feedback: str, model: str) -> str:
+    def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
        ...

    @abstractmethod
    def func_impl(
        self,
        func_sig: str,
-        model: str,
+        model: ModelBase,
        strategy: str,
        prev_func_impl: Optional[str] = None,
        feedback: Optional[str] = None,
@ -24,7 +27,7 @@ class Generator:
    def internal_tests(
            self,
            func_sig: str,
-            model: str,
+            model: ModelBase,
            committee_size: int = 1,
            max_num_tests: int = 5
    ) -> List[str]:
--- a/programming_runs/generators/generator_utils.py
+++ b/programming_runs/generators/generator_utils.py
@ -1,4 +1,5 @@
 import os
+from generators.model import ModelBase
 import openai
 import random
 from tenacity import (
@ -11,9 +12,10 @@ from typing import Union, List, Optional, Callable

 # openai.api_key = os.getenv("OPENAI_API_KEY")

+
 def generic_generate_func_impl(
    func_sig: str,
-    model: str,
+    model: ModelBase,
    strategy: str,
    prev_func_impl,
    feedback,
@ -34,7 +36,7 @@ def generic_generate_func_impl(
        raise ValueError(
            f"Invalid arguments: given `strategy=reflexion` but `prev_func_impl`, `feedback`, or `self_reflection` is None")

-    if model == "gpt-4" or model == "gpt-3.5-turbo":
+    if model.name == "gpt-4" or model.name == "gpt-3.5-turbo":
        if strategy == "reflexion":
            message = f"{REFLEXION_FEW_SHOT}\n[previous impl]:\n{prev_func_impl}\n\n[unit test results from previous impl]:\n{feedback}\n\n[reflection on previous impl]:\n{self_reflection}\n\n[improved impl]:\n{func_sig}"
            # func_bodies is a really bad name, as it can also be just 1 string
@ -44,7 +46,7 @@ def generic_generate_func_impl(
            print(' ----------------------- USER MESSAGE -----------------------')
            print(message, flush=True)
            print('----------------------------------------------')
-            func_bodies = gpt_chat(model, REFLEXION_CHAT_INSTRUCTION,
+            func_bodies = gpt_chat(model.name, REFLEXION_CHAT_INSTRUCTION,
                                   message, num_comps=num_comps, temperature=temperature)
        else:
            print('----------------------- SYSTEM MESSAGE -----------------------')
@ -53,17 +55,17 @@ def generic_generate_func_impl(
            print(' ----------------------- USER MESSAGE -----------------------')
            print(func_sig, flush=True)
            print('----------------------------------------------')
-            func_bodies = gpt_chat(model, SIMPLE_CHAT_INSTRUCTION if strategy ==
+            func_bodies = gpt_chat(model.name, SIMPLE_CHAT_INSTRUCTION if strategy ==
                                   "simple" else REFLEXION_CHAT_INSTRUCTION, func_sig, num_comps=num_comps, temperature=temperature)
    else:
        if strategy == "reflexion":
            prompt = f"{REFLEXION_COMPLETION_INSTRUCTION}\n{prev_func_impl}\n\nunit tests:\n{feedback}\n\nhint:\n{self_reflection}\n\n# improved implementation\n{func_sig}"
            func_bodies = gpt_completion(
-                model, prompt, num_comps=num_comps, temperature=temperature)
+                model.name, prompt, num_comps=num_comps, temperature=temperature)
        else:
            prompt = f"{SIMPLE_COMPLETION_INSTRUCTION}\n{func_sig}"
            func_bodies = gpt_completion(
-                model, prompt, num_comps=num_comps, temperature=temperature)
+                model.name, prompt, num_comps=num_comps, temperature=temperature)

    if num_comps == 1:
        assert isinstance(func_bodies, str)
@ -71,7 +73,7 @@ def generic_generate_func_impl(
        print(func_sig + fix_body(func_bodies))
        print('------------------------------------------')
        return func_sig + fix_body(func_bodies)
-        
+
    else:
        print('--------------------- GENERATED FUNC BODY ---------------------')
        print([func_sig + fix_body(func_body) for func_body in func_bodies])
@ -81,7 +83,7 @@ def generic_generate_func_impl(

 def generic_generate_internal_tests(
        func_sig: str,
-        model: str,
+        model: ModelBase,
        committee_size: int,
        max_num_tests: int,
        TEST_GENERATION_FEW_SHOT: str,
@ -95,19 +97,19 @@ def generic_generate_internal_tests(
    Generates tests for a function using a refinement technique with the number
    of specified commmittee members.
    """
-    if model == "gpt-4" or model == "gpt-3.5-turbo":
+    if model.name == "gpt-4" or model.name == "gpt-3.5-turbo":
        if is_react:
            message = f'{TEST_GENERATION_FEW_SHOT}\n\n[func signature]:\n{func_sig}\n\n[think]:'
            output = gpt_chat(
-                model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
+                model.name, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
            print(f'React test generation output: {output}')
        else:
            message = f'{TEST_GENERATION_FEW_SHOT}\n\nfunc signature:\n{func_sig}\nunit tests:'
            output = gpt_chat(
-                model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
+                model.name, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
    else:
        prompt = f'{TEST_GENERATION_COMPLETION_INSTRUCTION}\n\nfunc signature:\n{func_sig}\nunit tests:'
-        output = gpt_completion(model, prompt, max_tokens=1024)
+        output = gpt_completion(model.name, prompt, max_tokens=1024)
    all_tests = parse_tests(output)  # type: ignore
    valid_tests = [test for test in all_tests if is_syntax_valid(test)]

@ -121,7 +123,7 @@ def generic_generate_internal_tests(
 def generic_generate_self_reflection(
        func: str,
        feedback: str,
-        model: str,
+        model: ModelBase,
        SELF_REFLECTION_CHAT_INSTRUCTION: str,
        SELF_REFLECTION_COMPLETION_INSTRUCTION: str,
        SELF_REFLECTION_FEW_SHOT: Optional[str] = None
@ -129,18 +131,18 @@ def generic_generate_self_reflection(
    if model == "gpt-4" or model == "gpt-3.5-turbo":
        if SELF_REFLECTION_FEW_SHOT is not None:
            reflection = gpt_chat(
-                model,
+                model.name,
                SELF_REFLECTION_CHAT_INSTRUCTION,
                f'{SELF_REFLECTION_FEW_SHOT}\n\n[function impl]:\n{func}\n\n[unit test results]:\n{feedback}\n\n[self-reflection]:')
            print(f'Self reflection output: {reflection}')
        else:
            reflection = gpt_chat(
-                model,
+                model.name,
                SELF_REFLECTION_CHAT_INSTRUCTION,
                f'Function implementation:\n{func}\n\nUnit test results:\n{feedback}\n\nSelf-reflection:')
    else:
        reflection = gpt_completion(
-            model, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
+            model.name, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
    return reflection  # type: ignore


@ -198,7 +200,6 @@ def gpt_chat(
    return [choice.message.content for choice in response.choices]  # type: ignore


-
 def sample_n_random(items: List[str], n: int) -> List[str]:
    """Sample min(n, len(items)) random items from a list"""
    assert n >= 0
--- a/programming_runs/generators/model.py
+++ b/programming_runs/generators/model.py
@ -0,0 +1,16 @@
+class ModelBase():
+    def __init__(self, name):
+        self.name = name
+
+    def __repr__(self):
+        return f'{self.name}'
+
+
+class GPT4(ModelBase):
+    def __init__(self):
+        self.name = "gpt-4"
+
+
+class GPT35(ModelBase):
+    def __init__(self):
+        self.name = "gpt-3.5-turbo"
--- a/programming_runs/generators/py_generate.py
+++ b/programming_runs/generators/py_generate.py
@ -1,3 +1,4 @@
+from generators.model import ModelBase
 from .generator_types import Generator
 from .generator_utils import generic_generate_func_impl, generic_generate_internal_tests, generic_generate_self_reflection

@ -9,8 +10,10 @@ PY_SIMPLE_COMPLETION_INSTRUCTION = "# Write the body of this function only."
 PY_REFLEXION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only.\n\n-----"
 PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation.\n\n-----"

-PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
-PY_SIMPLE_CHAT_INSTRUCTION_V2 = "You are PythonGPT, an AI that only responds with only python code. You will be given a function signature and its docstring by the user. Respond only in code with a correct, efficient implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
+# The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
+PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response."
+# The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
+PY_SIMPLE_CHAT_INSTRUCTION_V2 = "You are PythonGPT, an AI that only responds with only python code. You will be given a function signature and its docstring by the user. Respond only in code with a correct, efficient implementation of the function. Do not include provided the docstring in your response."
 PY_REFLEXION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
 PY_REFLEXION_CHAT_INSTRUCTION_V2 = "You are PythonGPT. You will be given your previous implementation of a function, a series of unit tests results, and your self-reflection on your previous implementation. Apply the necessary changes below by responding only with the improved body of the function. Do not include the signature in your response. The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature. You will be given a few examples by the user."
 PY_REFLEXION_FEW_SHOT_ADD = '''Example 1:
@ -231,8 +234,9 @@ PY_TEST_GENERATION_COMPLETION_INSTRUCTION = f"""You are PythonGPT, an AI coding

 PY_TEST_GENERATION_CHAT_INSTRUCTION = """You are CodexGPT, an AI coding assistant that can write unique, diverse, and intuitive unit tests for functions given the signature and docstring."""

+
 class PyGenerator(Generator):
-    def self_reflection(self, func: str, feedback: str, model: str) -> str:
+    def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
        x = generic_generate_self_reflection(
            func=func,
            feedback=feedback,
@ -246,7 +250,7 @@ class PyGenerator(Generator):
    def func_impl(
        self,
        func_sig: str,
-        model: str,
+        model: ModelBase,
        strategy: str,
        prev_func_impl: Optional[str] = None,
        feedback: Optional[str] = None,
@ -264,7 +268,7 @@ class PyGenerator(Generator):
            num_comps=num_comps,
            temperature=temperature,
            REFLEXION_CHAT_INSTRUCTION=PY_REFLEXION_CHAT_INSTRUCTION,
-            REFLEXION_FEW_SHOT = PY_REFLEXION_FEW_SHOT_ADD,
+            REFLEXION_FEW_SHOT=PY_REFLEXION_FEW_SHOT_ADD,
            SIMPLE_CHAT_INSTRUCTION=PY_SIMPLE_CHAT_INSTRUCTION,
            REFLEXION_COMPLETION_INSTRUCTION=PY_REFLEXION_COMPLETION_INSTRUCTION,
            SIMPLE_COMPLETION_INSTRUCTION=PY_SIMPLE_COMPLETION_INSTRUCTION,
@ -272,8 +276,7 @@ class PyGenerator(Generator):
        )
        return x

-
-    def internal_tests(self, func_sig: str, model: str, committee_size: int = 1, max_num_tests: int = 5) -> List[str]:
+    def internal_tests(self, func_sig: str, model: ModelBase, committee_size: int = 1, max_num_tests: int = 5) -> List[str]:
        def parse_tests(tests: str) -> List[str]:
            return [test.strip() for test in tests.splitlines() if "assert" in test]
        """
@ -310,12 +313,15 @@ def handle_entire_body_indent(func_body: str) -> str:
    res = "\n".join(["    " + line for line in split])
    return res

+
 def fix_turbo_response(func_body: str) -> str:
    return fix_markdown(remove_unindented_signatures(func_body))

+
 def fix_markdown(func_body: str) -> str:
    return re.sub("`{3}", "", func_body)

+
 def remove_unindented_signatures(code: str) -> str:
    regex = r"^def\s+\w+\s*\("

@ -327,7 +333,7 @@ def remove_unindented_signatures(code: str) -> str:
        if re.match(regex, line):
            signature_found = True
            continue
-        
+
        if signature_found:
            after_signature.append(line)
        else:
--- a/programming_runs/generators/rs_generate.py
+++ b/programming_runs/generators/rs_generate.py
@ -1,3 +1,4 @@
+from generators.model import ModelBase
 from .generator_types import Generator
 from .generator_utils import gpt_chat, gpt_completion, generic_generate_func_impl, generic_generate_internal_tests, generic_generate_self_reflection

@ -137,7 +138,7 @@ def parse_tests(tests: str) -> List[str]:


 class RsGenerator(Generator):
-    def self_reflection(self, func: str, feedback: str, model: str) -> str:
+    def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
        return generic_generate_self_reflection(
            func=func,
            feedback=feedback,
@ -150,7 +151,7 @@ class RsGenerator(Generator):
    def func_impl(
        self,
        func_sig: str,
-        model: str,
+        model: ModelBase,
        strategy: str,
        prev_func_impl: Optional[str] = None,
        feedback: Optional[str] = None,
@ -178,7 +179,7 @@ class RsGenerator(Generator):
    def internal_tests(
            self,
            func_sig: str,
-            model: str,
+            model: ModelBase,
            committee_size: int = 1,
            max_num_tests: int = 5
    ) -> List[str]:
--- a/programming_runs/immediate_refinement.py
+++ b/programming_runs/immediate_refinement.py
@ -1,13 +1,13 @@
 from utils import enumerate_resume, make_printv, write_jsonl
 from executors import executor_factory
-from generators import generator_factory
+from generators import generator_factory, model_factory

 from typing import List


 def run_immediate_refinement(
    dataset: List[dict],
-    model: str,
+    model_name: str,
    language: str,
    max_iters: int,
    pass_at_k: int,
@ -17,6 +17,7 @@ def run_immediate_refinement(
 ) -> None:
    exe = executor_factory(language)
    gen = generator_factory(language)
+    model = model_factory(model_name)

    print_v = make_printv(verbose)

--- a/programming_runs/immediate_reflexion.py
+++ b/programming_runs/immediate_reflexion.py
@ -1,13 +1,13 @@
 from utils import enumerate_resume, make_printv, write_jsonl
 from executors import executor_factory
-from generators import generator_factory
+from generators import generator_factory, model_factory

 from typing import List


 def run_immediate_reflexion(
    dataset: List[dict],
-    model: str,
+    model_name: str,
    language: str,
    max_iters: int,
    pass_at_k: int,
@ -17,6 +17,7 @@ def run_immediate_reflexion(
 ) -> None:
    exe = executor_factory(language)
    gen = generator_factory(language)
+    model = model_factory(model_name)

    print_v = make_printv(verbose)

--- a/programming_runs/main.py
+++ b/programming_runs/main.py
@ -108,7 +108,7 @@ pass@k: {args.pass_at_k}
    # evaluate with pass@k
    run_strategy(
        dataset=dataset,
-        model=args.model,
+        model_name=args.model,
        language=args.language,
        max_iters=args.max_iters,
        pass_at_k=args.pass_at_k,
--- a/programming_runs/reflexion.py
+++ b/programming_runs/reflexion.py
@ -1,13 +1,13 @@
 from utils import enumerate_resume, make_printv, write_jsonl
 from executors import executor_factory
-from generators import generator_factory
+from generators import generator_factory, model_factory

 from typing import List


 def run_reflexion(
    dataset: List[dict],
-    model: str,
+    model_name: str,
    language: str,
    max_iters: int,
    pass_at_k: int,
@ -17,6 +17,7 @@ def run_reflexion(
 ) -> None:
    exe = executor_factory(language, is_leet=is_leetcode)
    gen = generator_factory(language)
+    model = model_factory(model_name)

    print_v = make_printv(verbose)

--- a/programming_runs/reflexion_ucs.py
+++ b/programming_runs/reflexion_ucs.py
@ -2,17 +2,19 @@ import warnings
 from lazzzy.ucs import ucs
 from utils import enumerate_resume, write_jsonl
 from executors import executor_factory
-from generators import generator_factory
+from generators import generator_factory, model_factory

 from typing import List, Set, Tuple


 DEBUG = True

+
 def debug_print(*args):
    if DEBUG:
        print(*args, flush=True)

+
 class State:
    def __init__(self, code: str, feedback: str, reflection: str, state: Tuple[bool]):
        self.code = code
@ -39,7 +41,7 @@ class State:

 def run_reflexion_ucs(
    dataset: List[dict],
-    model: str,
+    model_name: str,
    language: str,
    max_iters: int,
    pass_at_k: int,
@ -50,6 +52,7 @@ def run_reflexion_ucs(
 ) -> None:
    exe = executor_factory(language, is_leet=is_leetcode)
    gen = generator_factory(language)
+    model = model_factory(model_name)

    num_items = len(dataset)
    num_success = 0
@ -70,12 +73,14 @@ def run_reflexion_ucs(
            assert isinstance(cur_func_impl, str)  # num_comps of 1
            is_passing, feedback, state = exe.execute(cur_func_impl, tests_i)

-            debug_print(f"first attempt: \n{cur_func_impl}\n{feedback}\n{state}")
+            debug_print(
+                f"first attempt: \n{cur_func_impl}\n{feedback}\n{state}")

            # if solved, exit--pass_at_k 1 early
            if is_passing:
                debug_print("solved at first attempt")
-                is_solved = exe.evaluate(item["entry_point"], cur_func_impl, item["test"])
+                is_solved = exe.evaluate(
+                    item["entry_point"], cur_func_impl, item["test"])
                num_success += 1 if is_solved else 0
                break

@ -118,21 +123,23 @@ def run_reflexion_ucs(

                    already_seen.add(new_func)

-                    is_passing, feedback, new_state = exe.execute(new_func, tests_i)
-                    debug_print(f"expanding: \n{new_func}\n{feedback}\n{new_state}")
+                    is_passing, feedback, new_state = exe.execute(
+                        new_func, tests_i)
+                    debug_print(
+                        f"expanding: \n{new_func}\n{feedback}\n{new_state}")

                    if is_passing:
                        # return immediately if solved
                        return set([(State(new_func, feedback, "", new_state), 0)])

-                    new_reflection = gen.self_reflection(new_func, feedback, model)
+                    new_reflection = gen.self_reflection(
+                        new_func, feedback, model)
                    reflections.append(new_reflection)

                    num_failing = len([x for x in new_state if not x])
                    new_states.add(
                        (State(new_func, feedback, new_reflection, new_state), num_failing))

-
                debug_print(f"returning new states: {new_states}")

                return new_states