|
|
@ -1,25 +1,23 @@
|
|
|
|
from io import StringIO
|
|
|
|
|
|
|
|
import pkg_resources
|
|
|
|
import pkg_resources
|
|
|
|
import ctypes
|
|
|
|
import ctypes
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import platform
|
|
|
|
import platform
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
class DualOutput:
|
|
|
|
class DualStreamProcessor:
|
|
|
|
def __init__(self, stdout, string_io):
|
|
|
|
def __init__(self, stream=None):
|
|
|
|
self.stdout = stdout
|
|
|
|
self.stream = stream
|
|
|
|
self.string_io = string_io
|
|
|
|
self.output = ""
|
|
|
|
|
|
|
|
|
|
|
|
def write(self, text):
|
|
|
|
def write(self, text):
|
|
|
|
self.stdout.write(text)
|
|
|
|
cleaned_text = re.sub(r"\n(?!\n)", "", text)
|
|
|
|
self.string_io.write(text)
|
|
|
|
if self.stream is not None:
|
|
|
|
|
|
|
|
self.stream.write(cleaned_text)
|
|
|
|
|
|
|
|
self.stream.flush()
|
|
|
|
|
|
|
|
self.output += cleaned_text
|
|
|
|
|
|
|
|
|
|
|
|
def flush(self):
|
|
|
|
|
|
|
|
# It's a good idea to also define a flush method that flushes both
|
|
|
|
|
|
|
|
# outputs, as sys.stdout is expected to have this method.
|
|
|
|
|
|
|
|
self.stdout.flush()
|
|
|
|
|
|
|
|
self.string_io.flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: provide a config file to make this more robust
|
|
|
|
# TODO: provide a config file to make this more robust
|
|
|
|
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
|
|
|
|
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
|
|
|
@ -175,7 +173,7 @@ class LLModel:
|
|
|
|
repeat_penalty: float = 1.2,
|
|
|
|
repeat_penalty: float = 1.2,
|
|
|
|
repeat_last_n: int = 10,
|
|
|
|
repeat_last_n: int = 10,
|
|
|
|
context_erase: float = .5,
|
|
|
|
context_erase: float = .5,
|
|
|
|
std_passthrough: bool = False) -> str:
|
|
|
|
streaming: bool = False) -> str:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Generate response from model from a prompt.
|
|
|
|
Generate response from model from a prompt.
|
|
|
|
|
|
|
|
|
|
|
@ -183,12 +181,8 @@ class LLModel:
|
|
|
|
----------
|
|
|
|
----------
|
|
|
|
prompt: str
|
|
|
|
prompt: str
|
|
|
|
Question, task, or conversation for model to respond to
|
|
|
|
Question, task, or conversation for model to respond to
|
|
|
|
add_default_header: bool, optional
|
|
|
|
streaming: bool
|
|
|
|
Whether to add a prompt header (default is True)
|
|
|
|
Stream response to stdout
|
|
|
|
add_default_footer: bool, optional
|
|
|
|
|
|
|
|
Whether to add a prompt footer (default is True)
|
|
|
|
|
|
|
|
verbose: bool, optional
|
|
|
|
|
|
|
|
Whether to print prompt and response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
-------
|
|
|
@ -198,13 +192,14 @@ class LLModel:
|
|
|
|
prompt = prompt.encode('utf-8')
|
|
|
|
prompt = prompt.encode('utf-8')
|
|
|
|
prompt = ctypes.c_char_p(prompt)
|
|
|
|
prompt = ctypes.c_char_p(prompt)
|
|
|
|
|
|
|
|
|
|
|
|
# Change stdout to StringIO so we can collect response
|
|
|
|
|
|
|
|
old_stdout = sys.stdout
|
|
|
|
old_stdout = sys.stdout
|
|
|
|
collect_response = StringIO()
|
|
|
|
|
|
|
|
if std_passthrough:
|
|
|
|
stream_processor = DualStreamProcessor()
|
|
|
|
sys.stdout = DualOutput(old_stdout, collect_response)
|
|
|
|
|
|
|
|
else:
|
|
|
|
if streaming:
|
|
|
|
sys.stdout = collect_response
|
|
|
|
stream_processor.stream = sys.stdout
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sys.stdout = stream_processor
|
|
|
|
|
|
|
|
|
|
|
|
context = LLModelPromptContext(
|
|
|
|
context = LLModelPromptContext(
|
|
|
|
logits_size=logits_size,
|
|
|
|
logits_size=logits_size,
|
|
|
@ -228,13 +223,10 @@ class LLModel:
|
|
|
|
RecalculateCallback(self._recalculate_callback),
|
|
|
|
RecalculateCallback(self._recalculate_callback),
|
|
|
|
context)
|
|
|
|
context)
|
|
|
|
|
|
|
|
|
|
|
|
response = collect_response.getvalue()
|
|
|
|
# Revert to old stdout
|
|
|
|
sys.stdout = old_stdout
|
|
|
|
sys.stdout = old_stdout
|
|
|
|
|
|
|
|
|
|
|
|
# Remove the unnecessary new lines from response
|
|
|
|
return stream_processor.output
|
|
|
|
response = re.sub(r"\n(?!\n)", "", response).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Empty prompt callback
|
|
|
|
# Empty prompt callback
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|