diff --git a/.circleci/config.yml b/.circleci/config.yml index c94adefc..c30ed3cf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -170,25 +170,25 @@ workflows: branches: only: - main - # build-py-deploy: - # jobs: - # - build-py-linux: - # filters: - # branches: - # only: - # - build-py-macos: - # filters: - # branches: - # only: - # - build-py-windows: - # filters: - # branches: - # only: - # - store-and-upload-wheels: - # filters: - # branches: - # only: - # requires: - # - build-py-windows - # - build-py-linux - # - build-py-macos + build-py-deploy: + jobs: + - build-py-linux: + filters: + branches: + only: + - build-py-macos: + filters: + branches: + only: + - build-py-windows: + filters: + branches: + only: + - store-and-upload-wheels: + filters: + branches: + only: + requires: + - build-py-windows + - build-py-linux + - build-py-macos diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index 94f6df9a..bee3f3cd 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -155,24 +155,26 @@ class GPT4All(): print("Model downloaded at: " + download_path) return download_path - def generate(self, prompt: str, **generate_kwargs) -> str: + def generate(self, prompt: str, streaming: bool = False, **generate_kwargs) -> str: """ Surfaced method of running generate without accessing model object. Args: prompt: Raw string to be passed to model. + streaming: True if want output streamed to stdout. **generate_kwargs: Optional kwargs to pass to prompt context. Returns: Raw string of generated model response. """ - return self.model.generate(prompt, **generate_kwargs) + return self.model.generate(prompt, streaming=streaming, **generate_kwargs) def chat_completion(self, messages: List[Dict], default_prompt_header: bool = True, default_prompt_footer: bool = True, verbose: bool = True, + streaming: bool = True, **generate_kwargs) -> str: """ Format list of message dictionaries into a prompt and call model @@ -189,6 +191,7 @@ class GPT4All(): before user/assistant role messages. default_prompt_footer: If True (default), add default footer at end of prompt. verbose: If True (default), print full prompt and generated response. + streaming: True if want output streamed to stdout. **generate_kwargs: Optional kwargs to pass to prompt context. Returns: @@ -206,7 +209,7 @@ class GPT4All(): if verbose: print(full_prompt) - response = self.model.generate(full_prompt, **generate_kwargs) + response = self.model.generate(full_prompt, streaming=streaming, **generate_kwargs) if verbose: print(response) diff --git a/gpt4all-bindings/python/gpt4all/pyllmodel.py b/gpt4all-bindings/python/gpt4all/pyllmodel.py index a1f29f4d..f7d32399 100644 --- a/gpt4all-bindings/python/gpt4all/pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py @@ -1,25 +1,23 @@ -from io import StringIO import pkg_resources import ctypes import os import platform import re +import subprocess import sys -class DualOutput: - def __init__(self, stdout, string_io): - self.stdout = stdout - self.string_io = string_io +class DualStreamProcessor: + def __init__(self, stream=None): + self.stream = stream + self.output = "" def write(self, text): - self.stdout.write(text) - self.string_io.write(text) + cleaned_text = re.sub(r"\n(?!\n)", "", text) + if self.stream is not None: + self.stream.write(cleaned_text) + self.stream.flush() + self.output += cleaned_text - def flush(self): - # It's a good idea to also define a flush method that flushes both - # outputs, as sys.stdout is expected to have this method. - self.stdout.flush() - self.string_io.flush() # TODO: provide a config file to make this more robust LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\") @@ -175,7 +173,7 @@ class LLModel: repeat_penalty: float = 1.2, repeat_last_n: int = 10, context_erase: float = .5, - std_passthrough: bool = False) -> str: + streaming: bool = False) -> str: """ Generate response from model from a prompt. @@ -183,12 +181,8 @@ class LLModel: ---------- prompt: str Question, task, or conversation for model to respond to - add_default_header: bool, optional - Whether to add a prompt header (default is True) - add_default_footer: bool, optional - Whether to add a prompt footer (default is True) - verbose: bool, optional - Whether to print prompt and response + streaming: bool + Stream response to stdout Returns ------- @@ -198,13 +192,14 @@ class LLModel: prompt = prompt.encode('utf-8') prompt = ctypes.c_char_p(prompt) - # Change stdout to StringIO so we can collect response old_stdout = sys.stdout - collect_response = StringIO() - if std_passthrough: - sys.stdout = DualOutput(old_stdout, collect_response) - else: - sys.stdout = collect_response + + stream_processor = DualStreamProcessor() + + if streaming: + stream_processor.stream = sys.stdout + + sys.stdout = stream_processor context = LLModelPromptContext( logits_size=logits_size, @@ -227,14 +222,11 @@ class LLModel: ResponseCallback(self._response_callback), RecalculateCallback(self._recalculate_callback), context) - - response = collect_response.getvalue() + + # Revert to old stdout sys.stdout = old_stdout - # Remove the unnecessary new lines from response - response = re.sub(r"\n(?!\n)", "", response).strip() - - return response + return stream_processor.output # Empty prompt callback @staticmethod diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index 25433efb..2d8e7614 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -78,6 +78,8 @@ setup( 'dev': [ 'pytest', 'twine', + 'wheel', + 'setuptools', 'mkdocs-material', 'mkautodoc', 'mkdocstrings[python]',