|
|
@ -31,13 +31,13 @@ class LlamaCpp(LLM):
|
|
|
|
"""Token context window."""
|
|
|
|
"""Token context window."""
|
|
|
|
|
|
|
|
|
|
|
|
n_parts: int = Field(-1, alias="n_parts")
|
|
|
|
n_parts: int = Field(-1, alias="n_parts")
|
|
|
|
"""Number of parts to split the model into.
|
|
|
|
"""Number of parts to split the model into.
|
|
|
|
If -1, the number of parts is automatically determined."""
|
|
|
|
If -1, the number of parts is automatically determined."""
|
|
|
|
|
|
|
|
|
|
|
|
seed: int = Field(-1, alias="seed")
|
|
|
|
seed: int = Field(-1, alias="seed")
|
|
|
|
"""Seed. If -1, a random seed is used."""
|
|
|
|
"""Seed. If -1, a random seed is used."""
|
|
|
|
|
|
|
|
|
|
|
|
f16_kv: bool = Field(False, alias="f16_kv")
|
|
|
|
f16_kv: bool = Field(True, alias="f16_kv")
|
|
|
|
"""Use half-precision for key/value cache."""
|
|
|
|
"""Use half-precision for key/value cache."""
|
|
|
|
|
|
|
|
|
|
|
|
logits_all: bool = Field(False, alias="logits_all")
|
|
|
|
logits_all: bool = Field(False, alias="logits_all")
|
|
|
@ -50,7 +50,7 @@ class LlamaCpp(LLM):
|
|
|
|
"""Force system to keep model in RAM."""
|
|
|
|
"""Force system to keep model in RAM."""
|
|
|
|
|
|
|
|
|
|
|
|
n_threads: Optional[int] = Field(None, alias="n_threads")
|
|
|
|
n_threads: Optional[int] = Field(None, alias="n_threads")
|
|
|
|
"""Number of threads to use.
|
|
|
|
"""Number of threads to use.
|
|
|
|
If None, the number of threads is automatically determined."""
|
|
|
|
If None, the number of threads is automatically determined."""
|
|
|
|
|
|
|
|
|
|
|
|
n_batch: Optional[int] = Field(8, alias="n_batch")
|
|
|
|
n_batch: Optional[int] = Field(8, alias="n_batch")
|
|
|
|