fix prompt context so it's preserved in class

pull/981/head
Richard Guo 1 year ago
parent 85964a7635
commit a99cc34efb

@ -125,6 +125,7 @@ class LLModel:
def __init__(self):
self.model = None
self.model_name = None
self.context = None
def __del__(self):
if self.model is not None:
@ -211,27 +212,29 @@ class LLModel:
sys.stdout = stream_processor
context = LLModelPromptContext(
logits_size=logits_size,
tokens_size=tokens_size,
n_past=n_past,
n_ctx=n_ctx,
n_predict=n_predict,
top_k=top_k,
top_p=top_p,
temp=temp,
n_batch=n_batch,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
context_erase=context_erase
)
if self.context is None:
self.context = LLModelPromptContext(
logits_size=logits_size,
tokens_size=tokens_size,
n_past=n_past,
n_ctx=n_ctx,
n_predict=n_predict,
top_k=top_k,
top_p=top_p,
temp=temp,
n_batch=n_batch,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
context_erase=context_erase
)
llmodel.llmodel_prompt(self.model,
prompt,
PromptCallback(self._prompt_callback),
ResponseCallback(self._response_callback),
RecalculateCallback(self._recalculate_callback),
context)
self.context)
# Revert to old stdout
sys.stdout = old_stdout
@ -262,20 +265,21 @@ class LLModel:
prompt = prompt.encode('utf-8')
prompt = ctypes.c_char_p(prompt)
context = LLModelPromptContext(
logits_size=logits_size,
tokens_size=tokens_size,
n_past=n_past,
n_ctx=n_ctx,
n_predict=n_predict,
top_k=top_k,
top_p=top_p,
temp=temp,
n_batch=n_batch,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
context_erase=context_erase
)
if self.context is None:
self.context = LLModelPromptContext(
logits_size=logits_size,
tokens_size=tokens_size,
n_past=n_past,
n_ctx=n_ctx,
n_predict=n_predict,
top_k=top_k,
top_p=top_p,
temp=temp,
n_batch=n_batch,
repeat_penalty=repeat_penalty,
repeat_last_n=repeat_last_n,
context_erase=context_erase
)
# Put response tokens into an output queue
def _generator_response_callback(token_id, response):
@ -305,7 +309,7 @@ class LLModel:
PromptCallback(self._prompt_callback),
ResponseCallback(_generator_response_callback),
RecalculateCallback(self._recalculate_callback),
context))
self.context))
thread.start()
# Generator

Loading…
Cancel
Save