added model class

pull/15/head
cassanof 12 months ago
parent dbfc7c6a4f
commit af90f4444d

@ -1,3 +1,4 @@
from .py_generate import PyGenerator
from .rs_generate import RsGenerator
from .factory import generator_factory
from .factory import generator_factory, model_factory
from .model import ModelBase, GPT4, GPT35

@ -1,6 +1,8 @@
from .py_generate import PyGenerator
from .rs_generate import RsGenerator
from .generator_types import Generator
from .model import ModelBase, GPT4, GPT35
def generator_factory(lang: str) -> Generator:
if lang == "py" or lang == "python":
@ -9,3 +11,12 @@ def generator_factory(lang: str) -> Generator:
return RsGenerator()
else:
raise ValueError(f"Invalid language for generator: {lang}")
def model_factory(model_name: str) -> ModelBase:
if model_name == "gpt-4":
return GPT4()
elif model_name == "gpt-3.5-turbo":
return GPT35()
else:
raise ValueError(f"Invalid model name: {model_name}")

@ -1,16 +1,19 @@
from typing import List, Optional, Union
from abc import abstractmethod, ABC
from generators.model import ModelBase
class Generator:
@abstractmethod
def self_reflection(self, func: str, feedback: str, model: str) -> str:
def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
...
@abstractmethod
def func_impl(
self,
func_sig: str,
model: str,
model: ModelBase,
strategy: str,
prev_func_impl: Optional[str] = None,
feedback: Optional[str] = None,
@ -24,7 +27,7 @@ class Generator:
def internal_tests(
self,
func_sig: str,
model: str,
model: ModelBase,
committee_size: int = 1,
max_num_tests: int = 5
) -> List[str]:

@ -1,4 +1,5 @@
import os
from generators.model import ModelBase
import openai
import random
from tenacity import (
@ -11,9 +12,10 @@ from typing import Union, List, Optional, Callable
# openai.api_key = os.getenv("OPENAI_API_KEY")
def generic_generate_func_impl(
func_sig: str,
model: str,
model: ModelBase,
strategy: str,
prev_func_impl,
feedback,
@ -34,7 +36,7 @@ def generic_generate_func_impl(
raise ValueError(
f"Invalid arguments: given `strategy=reflexion` but `prev_func_impl`, `feedback`, or `self_reflection` is None")
if model == "gpt-4" or model == "gpt-3.5-turbo":
if model.name == "gpt-4" or model.name == "gpt-3.5-turbo":
if strategy == "reflexion":
message = f"{REFLEXION_FEW_SHOT}\n[previous impl]:\n{prev_func_impl}\n\n[unit test results from previous impl]:\n{feedback}\n\n[reflection on previous impl]:\n{self_reflection}\n\n[improved impl]:\n{func_sig}"
# func_bodies is a really bad name, as it can also be just 1 string
@ -44,7 +46,7 @@ def generic_generate_func_impl(
print(' ----------------------- USER MESSAGE -----------------------')
print(message, flush=True)
print('----------------------------------------------')
func_bodies = gpt_chat(model, REFLEXION_CHAT_INSTRUCTION,
func_bodies = gpt_chat(model.name, REFLEXION_CHAT_INSTRUCTION,
message, num_comps=num_comps, temperature=temperature)
else:
print('----------------------- SYSTEM MESSAGE -----------------------')
@ -53,17 +55,17 @@ def generic_generate_func_impl(
print(' ----------------------- USER MESSAGE -----------------------')
print(func_sig, flush=True)
print('----------------------------------------------')
func_bodies = gpt_chat(model, SIMPLE_CHAT_INSTRUCTION if strategy ==
func_bodies = gpt_chat(model.name, SIMPLE_CHAT_INSTRUCTION if strategy ==
"simple" else REFLEXION_CHAT_INSTRUCTION, func_sig, num_comps=num_comps, temperature=temperature)
else:
if strategy == "reflexion":
prompt = f"{REFLEXION_COMPLETION_INSTRUCTION}\n{prev_func_impl}\n\nunit tests:\n{feedback}\n\nhint:\n{self_reflection}\n\n# improved implementation\n{func_sig}"
func_bodies = gpt_completion(
model, prompt, num_comps=num_comps, temperature=temperature)
model.name, prompt, num_comps=num_comps, temperature=temperature)
else:
prompt = f"{SIMPLE_COMPLETION_INSTRUCTION}\n{func_sig}"
func_bodies = gpt_completion(
model, prompt, num_comps=num_comps, temperature=temperature)
model.name, prompt, num_comps=num_comps, temperature=temperature)
if num_comps == 1:
assert isinstance(func_bodies, str)
@ -71,7 +73,7 @@ def generic_generate_func_impl(
print(func_sig + fix_body(func_bodies))
print('------------------------------------------')
return func_sig + fix_body(func_bodies)
else:
print('--------------------- GENERATED FUNC BODY ---------------------')
print([func_sig + fix_body(func_body) for func_body in func_bodies])
@ -81,7 +83,7 @@ def generic_generate_func_impl(
def generic_generate_internal_tests(
func_sig: str,
model: str,
model: ModelBase,
committee_size: int,
max_num_tests: int,
TEST_GENERATION_FEW_SHOT: str,
@ -95,19 +97,19 @@ def generic_generate_internal_tests(
Generates tests for a function using a refinement technique with the number
of specified commmittee members.
"""
if model == "gpt-4" or model == "gpt-3.5-turbo":
if model.name == "gpt-4" or model.name == "gpt-3.5-turbo":
if is_react:
message = f'{TEST_GENERATION_FEW_SHOT}\n\n[func signature]:\n{func_sig}\n\n[think]:'
output = gpt_chat(
model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
model.name, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
print(f'React test generation output: {output}')
else:
message = f'{TEST_GENERATION_FEW_SHOT}\n\nfunc signature:\n{func_sig}\nunit tests:'
output = gpt_chat(
model, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
model.name, TEST_GENERATION_CHAT_INSTRUCTION, message, max_tokens=1024)
else:
prompt = f'{TEST_GENERATION_COMPLETION_INSTRUCTION}\n\nfunc signature:\n{func_sig}\nunit tests:'
output = gpt_completion(model, prompt, max_tokens=1024)
output = gpt_completion(model.name, prompt, max_tokens=1024)
all_tests = parse_tests(output) # type: ignore
valid_tests = [test for test in all_tests if is_syntax_valid(test)]
@ -121,7 +123,7 @@ def generic_generate_internal_tests(
def generic_generate_self_reflection(
func: str,
feedback: str,
model: str,
model: ModelBase,
SELF_REFLECTION_CHAT_INSTRUCTION: str,
SELF_REFLECTION_COMPLETION_INSTRUCTION: str,
SELF_REFLECTION_FEW_SHOT: Optional[str] = None
@ -129,18 +131,18 @@ def generic_generate_self_reflection(
if model == "gpt-4" or model == "gpt-3.5-turbo":
if SELF_REFLECTION_FEW_SHOT is not None:
reflection = gpt_chat(
model,
model.name,
SELF_REFLECTION_CHAT_INSTRUCTION,
f'{SELF_REFLECTION_FEW_SHOT}\n\n[function impl]:\n{func}\n\n[unit test results]:\n{feedback}\n\n[self-reflection]:')
print(f'Self reflection output: {reflection}')
else:
reflection = gpt_chat(
model,
model.name,
SELF_REFLECTION_CHAT_INSTRUCTION,
f'Function implementation:\n{func}\n\nUnit test results:\n{feedback}\n\nSelf-reflection:')
else:
reflection = gpt_completion(
model, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
model.name, f'{SELF_REFLECTION_COMPLETION_INSTRUCTION}\n{func}\n\n{feedback}\n\nExplanation:')
return reflection # type: ignore
@ -198,7 +200,6 @@ def gpt_chat(
return [choice.message.content for choice in response.choices] # type: ignore
def sample_n_random(items: List[str], n: int) -> List[str]:
"""Sample min(n, len(items)) random items from a list"""
assert n >= 0

@ -0,0 +1,16 @@
class ModelBase():
def __init__(self, name):
self.name = name
def __repr__(self):
return f'{self.name}'
class GPT4(ModelBase):
def __init__(self):
self.name = "gpt-4"
class GPT35(ModelBase):
def __init__(self):
self.name = "gpt-3.5-turbo"

@ -1,3 +1,4 @@
from generators.model import ModelBase
from .generator_types import Generator
from .generator_utils import generic_generate_func_impl, generic_generate_internal_tests, generic_generate_self_reflection
@ -9,8 +10,10 @@ PY_SIMPLE_COMPLETION_INSTRUCTION = "# Write the body of this function only."
PY_REFLEXION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only.\n\n-----"
PY_SELF_REFLECTION_COMPLETION_INSTRUCTION = "You are a Python writing assistant. You will be given a function implementation and a series of unit tests. Your goal is to write a few sentences to explain why your implementation is wrong as indicated by the tests. You will need this as a hint when you try again later. Only provide the few sentence description in your answer, not the implementation.\n\n-----"
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
PY_SIMPLE_CHAT_INSTRUCTION_V2 = "You are PythonGPT, an AI that only responds with only python code. You will be given a function signature and its docstring by the user. Respond only in code with a correct, efficient implementation of the function. Do not include provided the docstring in your response." # The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
# The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
PY_SIMPLE_CHAT_INSTRUCTION = "You are PythonGPT, an AI that only responds with python code, NOT ENGLISH. You will be given a function signature and its docstring by the user. Respond only in code with correct implementation of the function. Do not include provided the docstring in your response."
# The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature.
PY_SIMPLE_CHAT_INSTRUCTION_V2 = "You are PythonGPT, an AI that only responds with only python code. You will be given a function signature and its docstring by the user. Respond only in code with a correct, efficient implementation of the function. Do not include provided the docstring in your response."
PY_REFLEXION_CHAT_INSTRUCTION = "You are PythonGPT. You will be given your past function implementation, a series of unit tests, and a hint to change the implementation appropriately. Apply the changes below by writing the body of this function only. You should fill in the following text of the missing function body. For example, the first line of the completion should have 4 spaces for the indendation so that it fits syntactically with the preceding signature."
PY_REFLEXION_CHAT_INSTRUCTION_V2 = "You are PythonGPT. You will be given your previous implementation of a function, a series of unit tests results, and your self-reflection on your previous implementation. Apply the necessary changes below by responding only with the improved body of the function. Do not include the signature in your response. The first line of your response should have 4 spaces of indentation so that it fits syntactically with the user provided signature. You will be given a few examples by the user."
PY_REFLEXION_FEW_SHOT_ADD = '''Example 1:
@ -231,8 +234,9 @@ PY_TEST_GENERATION_COMPLETION_INSTRUCTION = f"""You are PythonGPT, an AI coding
PY_TEST_GENERATION_CHAT_INSTRUCTION = """You are CodexGPT, an AI coding assistant that can write unique, diverse, and intuitive unit tests for functions given the signature and docstring."""
class PyGenerator(Generator):
def self_reflection(self, func: str, feedback: str, model: str) -> str:
def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
x = generic_generate_self_reflection(
func=func,
feedback=feedback,
@ -246,7 +250,7 @@ class PyGenerator(Generator):
def func_impl(
self,
func_sig: str,
model: str,
model: ModelBase,
strategy: str,
prev_func_impl: Optional[str] = None,
feedback: Optional[str] = None,
@ -264,7 +268,7 @@ class PyGenerator(Generator):
num_comps=num_comps,
temperature=temperature,
REFLEXION_CHAT_INSTRUCTION=PY_REFLEXION_CHAT_INSTRUCTION,
REFLEXION_FEW_SHOT = PY_REFLEXION_FEW_SHOT_ADD,
REFLEXION_FEW_SHOT=PY_REFLEXION_FEW_SHOT_ADD,
SIMPLE_CHAT_INSTRUCTION=PY_SIMPLE_CHAT_INSTRUCTION,
REFLEXION_COMPLETION_INSTRUCTION=PY_REFLEXION_COMPLETION_INSTRUCTION,
SIMPLE_COMPLETION_INSTRUCTION=PY_SIMPLE_COMPLETION_INSTRUCTION,
@ -272,8 +276,7 @@ class PyGenerator(Generator):
)
return x
def internal_tests(self, func_sig: str, model: str, committee_size: int = 1, max_num_tests: int = 5) -> List[str]:
def internal_tests(self, func_sig: str, model: ModelBase, committee_size: int = 1, max_num_tests: int = 5) -> List[str]:
def parse_tests(tests: str) -> List[str]:
return [test.strip() for test in tests.splitlines() if "assert" in test]
"""
@ -310,12 +313,15 @@ def handle_entire_body_indent(func_body: str) -> str:
res = "\n".join([" " + line for line in split])
return res
def fix_turbo_response(func_body: str) -> str:
return fix_markdown(remove_unindented_signatures(func_body))
def fix_markdown(func_body: str) -> str:
return re.sub("`{3}", "", func_body)
def remove_unindented_signatures(code: str) -> str:
regex = r"^def\s+\w+\s*\("
@ -327,7 +333,7 @@ def remove_unindented_signatures(code: str) -> str:
if re.match(regex, line):
signature_found = True
continue
if signature_found:
after_signature.append(line)
else:

@ -1,3 +1,4 @@
from generators.model import ModelBase
from .generator_types import Generator
from .generator_utils import gpt_chat, gpt_completion, generic_generate_func_impl, generic_generate_internal_tests, generic_generate_self_reflection
@ -137,7 +138,7 @@ def parse_tests(tests: str) -> List[str]:
class RsGenerator(Generator):
def self_reflection(self, func: str, feedback: str, model: str) -> str:
def self_reflection(self, func: str, feedback: str, model: ModelBase) -> str:
return generic_generate_self_reflection(
func=func,
feedback=feedback,
@ -150,7 +151,7 @@ class RsGenerator(Generator):
def func_impl(
self,
func_sig: str,
model: str,
model: ModelBase,
strategy: str,
prev_func_impl: Optional[str] = None,
feedback: Optional[str] = None,
@ -178,7 +179,7 @@ class RsGenerator(Generator):
def internal_tests(
self,
func_sig: str,
model: str,
model: ModelBase,
committee_size: int = 1,
max_num_tests: int = 5
) -> List[str]:

@ -1,13 +1,13 @@
from utils import enumerate_resume, make_printv, write_jsonl
from executors import executor_factory
from generators import generator_factory
from generators import generator_factory, model_factory
from typing import List
def run_immediate_refinement(
dataset: List[dict],
model: str,
model_name: str,
language: str,
max_iters: int,
pass_at_k: int,
@ -17,6 +17,7 @@ def run_immediate_refinement(
) -> None:
exe = executor_factory(language)
gen = generator_factory(language)
model = model_factory(model_name)
print_v = make_printv(verbose)

@ -1,13 +1,13 @@
from utils import enumerate_resume, make_printv, write_jsonl
from executors import executor_factory
from generators import generator_factory
from generators import generator_factory, model_factory
from typing import List
def run_immediate_reflexion(
dataset: List[dict],
model: str,
model_name: str,
language: str,
max_iters: int,
pass_at_k: int,
@ -17,6 +17,7 @@ def run_immediate_reflexion(
) -> None:
exe = executor_factory(language)
gen = generator_factory(language)
model = model_factory(model_name)
print_v = make_printv(verbose)

@ -108,7 +108,7 @@ pass@k: {args.pass_at_k}
# evaluate with pass@k
run_strategy(
dataset=dataset,
model=args.model,
model_name=args.model,
language=args.language,
max_iters=args.max_iters,
pass_at_k=args.pass_at_k,

@ -1,13 +1,13 @@
from utils import enumerate_resume, make_printv, write_jsonl
from executors import executor_factory
from generators import generator_factory
from generators import generator_factory, model_factory
from typing import List
def run_reflexion(
dataset: List[dict],
model: str,
model_name: str,
language: str,
max_iters: int,
pass_at_k: int,
@ -17,6 +17,7 @@ def run_reflexion(
) -> None:
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
model = model_factory(model_name)
print_v = make_printv(verbose)

@ -2,17 +2,19 @@ import warnings
from lazzzy.ucs import ucs
from utils import enumerate_resume, write_jsonl
from executors import executor_factory
from generators import generator_factory
from generators import generator_factory, model_factory
from typing import List, Set, Tuple
DEBUG = True
def debug_print(*args):
if DEBUG:
print(*args, flush=True)
class State:
def __init__(self, code: str, feedback: str, reflection: str, state: Tuple[bool]):
self.code = code
@ -39,7 +41,7 @@ class State:
def run_reflexion_ucs(
dataset: List[dict],
model: str,
model_name: str,
language: str,
max_iters: int,
pass_at_k: int,
@ -50,6 +52,7 @@ def run_reflexion_ucs(
) -> None:
exe = executor_factory(language, is_leet=is_leetcode)
gen = generator_factory(language)
model = model_factory(model_name)
num_items = len(dataset)
num_success = 0
@ -70,12 +73,14 @@ def run_reflexion_ucs(
assert isinstance(cur_func_impl, str) # num_comps of 1
is_passing, feedback, state = exe.execute(cur_func_impl, tests_i)
debug_print(f"first attempt: \n{cur_func_impl}\n{feedback}\n{state}")
debug_print(
f"first attempt: \n{cur_func_impl}\n{feedback}\n{state}")
# if solved, exit--pass_at_k 1 early
if is_passing:
debug_print("solved at first attempt")
is_solved = exe.evaluate(item["entry_point"], cur_func_impl, item["test"])
is_solved = exe.evaluate(
item["entry_point"], cur_func_impl, item["test"])
num_success += 1 if is_solved else 0
break
@ -118,21 +123,23 @@ def run_reflexion_ucs(
already_seen.add(new_func)
is_passing, feedback, new_state = exe.execute(new_func, tests_i)
debug_print(f"expanding: \n{new_func}\n{feedback}\n{new_state}")
is_passing, feedback, new_state = exe.execute(
new_func, tests_i)
debug_print(
f"expanding: \n{new_func}\n{feedback}\n{new_state}")
if is_passing:
# return immediately if solved
return set([(State(new_func, feedback, "", new_state), 0)])
new_reflection = gen.self_reflection(new_func, feedback, model)
new_reflection = gen.self_reflection(
new_func, feedback, model)
reflections.append(new_reflection)
num_failing = len([x for x in new_state if not x])
new_states.add(
(State(new_func, feedback, new_reflection, new_state), num_failing))
debug_print(f"returning new states: {new_states}")
return new_states

Loading…
Cancel
Save