mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
llamamodel: use greedy sampling when temp=0 (#2854)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
8ccf1fa2f5
commit
6518b33697
@ -137,7 +137,7 @@ struct gpt_params {
|
||||
bool use_mlock = false; // use mlock to keep model in memory
|
||||
};
|
||||
|
||||
static int llama_sample_top_p_top_k(
|
||||
static llama_token llama_sample_top_p_top_k(
|
||||
llama_context *ctx,
|
||||
const llama_token *last_n_tokens_data,
|
||||
int last_n_tokens_size,
|
||||
@ -157,14 +157,22 @@ static int llama_sample_top_p_top_k(
|
||||
llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};
|
||||
// Sample repeat penalty
|
||||
llama_sample_repetition_penalties(nullptr, &candidates_p, last_n_tokens_data, last_n_tokens_size, repeat_penalty, 0.0f, 0.0f);
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||
llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1);
|
||||
llama_sample_typical(ctx, &candidates_p, 1.0f, 1);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
||||
llama_sample_min_p(ctx, &candidates_p, min_p, 1);
|
||||
llama_sample_temp(ctx, &candidates_p, temp);
|
||||
return llama_sample_token(ctx, &candidates_p);
|
||||
|
||||
llama_token id;
|
||||
if (temp == 0.0) {
|
||||
// greedy sampling, no probs
|
||||
id = llama_sample_token_greedy(ctx, &candidates_p);
|
||||
} else {
|
||||
// temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||
llama_sample_tail_free(ctx, &candidates_p, 1.0f, 1);
|
||||
llama_sample_typical(ctx, &candidates_p, 1.0f, 1);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
||||
llama_sample_min_p(ctx, &candidates_p, min_p, 1);
|
||||
llama_sample_temp(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token(ctx, &candidates_p);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
const char *get_arch_name(gguf_context *ctx_gguf)
|
||||
|
@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
|
||||
|
||||
### Changed
|
||||
- Search for pip-installed CUDA 11 as well as CUDA 12 ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
|
||||
- Stop shipping CUBINs to reduce wheel size ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
|
||||
|
@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Use greedy sampling when temperature is set to zero ([#2854](https://github.com/nomic-ai/gpt4all/pull/2854))
|
||||
|
||||
## [3.2.1] - 2024-08-13
|
||||
|
||||
### Fixed
|
||||
- Do not initialize Vulkan driver when only using CPU ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
|
||||
- Fix a potential crash on exit when using only CPU on Linux with NVIDIA (does not affect X11) ([#2843](https://github.com/nomic-ai/gpt4all/pull/2843))
|
||||
@ -90,7 +95,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
||||
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))
|
||||
|
||||
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...HEAD
|
||||
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.2.1...HEAD
|
||||
[3.2.1]: https://github.com/nomic-ai/gpt4all/compare/v3.2.0...v3.2.1
|
||||
[3.2.0]: https://github.com/nomic-ai/gpt4all/compare/v3.1.1...v3.2.0
|
||||
[3.1.1]: https://github.com/nomic-ai/gpt4all/compare/v3.1.0...v3.1.1
|
||||
[3.1.0]: https://github.com/nomic-ai/gpt4all/compare/v3.0.0...v3.1.0
|
||||
|
Loading…
Reference in New Issue
Block a user