python bindings should be quiet by default

* disable llama.cpp logging unless GPT4ALL_VERBOSE_LLAMACPP envvar is
  nonempty
* make verbose flag for retrieve_model default false (but also be
  overridable via gpt4all constructor)

should be able to run a basic test:

```python
import gpt4all
model = gpt4all.GPT4All('/Users/aaron/Downloads/rift-coder-v0-7b-q4_0.gguf')
print(model.generate('def fib(n):'))
```

and see no non-model output when successful
This commit is contained in:
Aaron Miller 2023-10-10 11:10:25 -07:00
parent 7b611b49f2
commit afaa291eab
2 changed files with 20 additions and 3 deletions

View File

@ -36,6 +36,17 @@ namespace {
const char *modelType_ = "LLaMA"; const char *modelType_ = "LLaMA";
} }
static void null_log_callback(enum ggml_log_level level, const char* text, void* userdata) {
(void)level;
(void)text;
(void)userdata;
}
static bool llama_verbose() {
const char* var = getenv("GPT4ALL_VERBOSE_LLAMACPP");
return var && *var;
}
struct gpt_params { struct gpt_params {
int32_t seed = -1; // RNG seed int32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_keep = 0; // number of tokens to keep from initial prompt
@ -144,7 +155,9 @@ bool LLamaModel::loadModel(const std::string &modelPath)
d_ptr->params.use_mlock = params.use_mlock; d_ptr->params.use_mlock = params.use_mlock;
#endif #endif
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
std::cerr << "llama.cpp: using Metal" << std::endl; if (llama_verbose()) {
std::cerr << "llama.cpp: using Metal" << std::endl;
}
// metal always runs the whole model if n_gpu_layers is not 0, at least // metal always runs the whole model if n_gpu_layers is not 0, at least
// currently // currently
d_ptr->params.n_gpu_layers = 1; d_ptr->params.n_gpu_layers = 1;
@ -390,6 +403,9 @@ DLL_EXPORT bool magic_match(const char * fname) {
} }
DLL_EXPORT LLModel *construct() { DLL_EXPORT LLModel *construct() {
if (!llama_verbose()) {
llama_log_set(null_log_callback, nullptr);
}
return new LLamaModel; return new LLamaModel;
} }
} }

View File

@ -67,6 +67,7 @@ class GPT4All:
allow_download: bool = True, allow_download: bool = True,
n_threads: Optional[int] = None, n_threads: Optional[int] = None,
device: Optional[str] = "cpu", device: Optional[str] = "cpu",
verbose: bool = False,
): ):
""" """
Constructor Constructor
@ -91,7 +92,7 @@ class GPT4All:
self.model_type = model_type self.model_type = model_type
self.model = pyllmodel.LLModel() self.model = pyllmodel.LLModel()
# Retrieve model and download if allowed # Retrieve model and download if allowed
self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download) self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
if device is not None: if device is not None:
if device != "cpu": if device != "cpu":
self.model.init_gpu(model_path=self.config["path"], device=device) self.model.init_gpu(model_path=self.config["path"], device=device)
@ -119,7 +120,7 @@ class GPT4All:
model_name: str, model_name: str,
model_path: Optional[Union[str, os.PathLike[str]]] = None, model_path: Optional[Union[str, os.PathLike[str]]] = None,
allow_download: bool = True, allow_download: bool = True,
verbose: bool = True, verbose: bool = False,
) -> ConfigType: ) -> ConfigType:
""" """
Find model file, and if it doesn't exist, download the model. Find model file, and if it doesn't exist, download the model.