deploying new version with streaming

pull/629/head
Richard Guo 1 year ago
parent bce2b3025b
commit 057b9f51bc

@ -170,25 +170,25 @@ workflows:
branches: branches:
only: only:
- main - main
# build-py-deploy: build-py-deploy:
# jobs: jobs:
# - build-py-linux: - build-py-linux:
# filters: filters:
# branches: branches:
# only: only:
# - build-py-macos: - build-py-macos:
# filters: filters:
# branches: branches:
# only: only:
# - build-py-windows: - build-py-windows:
# filters: filters:
# branches: branches:
# only: only:
# - store-and-upload-wheels: - store-and-upload-wheels:
# filters: filters:
# branches: branches:
# only: only:
# requires: requires:
# - build-py-windows - build-py-windows
# - build-py-linux - build-py-linux
# - build-py-macos - build-py-macos

@ -155,24 +155,26 @@ class GPT4All():
print("Model downloaded at: " + download_path) print("Model downloaded at: " + download_path)
return download_path return download_path
def generate(self, prompt: str, **generate_kwargs) -> str: def generate(self, prompt: str, streaming: bool = False, **generate_kwargs) -> str:
""" """
Surfaced method of running generate without accessing model object. Surfaced method of running generate without accessing model object.
Args: Args:
prompt: Raw string to be passed to model. prompt: Raw string to be passed to model.
streaming: True if want output streamed to stdout.
**generate_kwargs: Optional kwargs to pass to prompt context. **generate_kwargs: Optional kwargs to pass to prompt context.
Returns: Returns:
Raw string of generated model response. Raw string of generated model response.
""" """
return self.model.generate(prompt, **generate_kwargs) return self.model.generate(prompt, streaming=streaming, **generate_kwargs)
def chat_completion(self, def chat_completion(self,
messages: List[Dict], messages: List[Dict],
default_prompt_header: bool = True, default_prompt_header: bool = True,
default_prompt_footer: bool = True, default_prompt_footer: bool = True,
verbose: bool = True, verbose: bool = True,
streaming: bool = True,
**generate_kwargs) -> str: **generate_kwargs) -> str:
""" """
Format list of message dictionaries into a prompt and call model Format list of message dictionaries into a prompt and call model
@ -189,6 +191,7 @@ class GPT4All():
before user/assistant role messages. before user/assistant role messages.
default_prompt_footer: If True (default), add default footer at end of prompt. default_prompt_footer: If True (default), add default footer at end of prompt.
verbose: If True (default), print full prompt and generated response. verbose: If True (default), print full prompt and generated response.
streaming: True if want output streamed to stdout.
**generate_kwargs: Optional kwargs to pass to prompt context. **generate_kwargs: Optional kwargs to pass to prompt context.
Returns: Returns:
@ -206,7 +209,7 @@ class GPT4All():
if verbose: if verbose:
print(full_prompt) print(full_prompt)
response = self.model.generate(full_prompt, **generate_kwargs) response = self.model.generate(full_prompt, streaming=streaming, **generate_kwargs)
if verbose: if verbose:
print(response) print(response)

@ -1,25 +1,23 @@
from io import StringIO
import pkg_resources import pkg_resources
import ctypes import ctypes
import os import os
import platform import platform
import re import re
import subprocess
import sys import sys
class DualOutput: class DualStreamProcessor:
def __init__(self, stdout, string_io): def __init__(self, stream=None):
self.stdout = stdout self.stream = stream
self.string_io = string_io self.output = ""
def write(self, text): def write(self, text):
self.stdout.write(text) cleaned_text = re.sub(r"\n(?!\n)", "", text)
self.string_io.write(text) if self.stream is not None:
self.stream.write(cleaned_text)
self.stream.flush()
self.output += cleaned_text
def flush(self):
# It's a good idea to also define a flush method that flushes both
# outputs, as sys.stdout is expected to have this method.
self.stdout.flush()
self.string_io.flush()
# TODO: provide a config file to make this more robust # TODO: provide a config file to make this more robust
LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\") LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
@ -175,7 +173,7 @@ class LLModel:
repeat_penalty: float = 1.2, repeat_penalty: float = 1.2,
repeat_last_n: int = 10, repeat_last_n: int = 10,
context_erase: float = .5, context_erase: float = .5,
std_passthrough: bool = False) -> str: streaming: bool = False) -> str:
""" """
Generate response from model from a prompt. Generate response from model from a prompt.
@ -183,12 +181,8 @@ class LLModel:
---------- ----------
prompt: str prompt: str
Question, task, or conversation for model to respond to Question, task, or conversation for model to respond to
add_default_header: bool, optional streaming: bool
Whether to add a prompt header (default is True) Stream response to stdout
add_default_footer: bool, optional
Whether to add a prompt footer (default is True)
verbose: bool, optional
Whether to print prompt and response
Returns Returns
------- -------
@ -198,13 +192,14 @@ class LLModel:
prompt = prompt.encode('utf-8') prompt = prompt.encode('utf-8')
prompt = ctypes.c_char_p(prompt) prompt = ctypes.c_char_p(prompt)
# Change stdout to StringIO so we can collect response
old_stdout = sys.stdout old_stdout = sys.stdout
collect_response = StringIO()
if std_passthrough: stream_processor = DualStreamProcessor()
sys.stdout = DualOutput(old_stdout, collect_response)
else: if streaming:
sys.stdout = collect_response stream_processor.stream = sys.stdout
sys.stdout = stream_processor
context = LLModelPromptContext( context = LLModelPromptContext(
logits_size=logits_size, logits_size=logits_size,
@ -228,13 +223,10 @@ class LLModel:
RecalculateCallback(self._recalculate_callback), RecalculateCallback(self._recalculate_callback),
context) context)
response = collect_response.getvalue() # Revert to old stdout
sys.stdout = old_stdout sys.stdout = old_stdout
# Remove the unnecessary new lines from response return stream_processor.output
response = re.sub(r"\n(?!\n)", "", response).strip()
return response
# Empty prompt callback # Empty prompt callback
@staticmethod @staticmethod

@ -78,6 +78,8 @@ setup(
'dev': [ 'dev': [
'pytest', 'pytest',
'twine', 'twine',
'wheel',
'setuptools',
'mkdocs-material', 'mkdocs-material',
'mkautodoc', 'mkautodoc',
'mkdocstrings[python]', 'mkdocstrings[python]',

Loading…
Cancel
Save