diff --git a/assets/arena.html b/assets/arena.html
index 6424542..b20553d 100644
--- a/assets/arena.html
+++ b/assets/arena.html
@@ -746,8 +746,8 @@
messages: messages,
stream: true,
};
- const { max_output_token, need_max_tokens } = retrieveModel(this.models, chat.model_id);
- if (!body["max_tokens"] && need_max_tokens) {
+ const { max_output_token, pass_max_tokens } = retrieveModel(this.models, chat.model_id);
+ if (!body["max_tokens"] && pass_max_tokens) {
body["max_tokens"] = max_output_token;
};
return body;
@@ -819,14 +819,14 @@
function retrieveModel(models, id) {
const model = models.find(model => model.id === id);
if (!model) return {};
- const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
- const need_max_tokens = !!model.max_output_tokens;
+ const max_output_token = model.max_output_tokens;
const supports_vision = !!model.supports_vision;
+ const pass_max_tokens = !!model.pass_max_tokens;
return {
id,
max_output_token,
- need_max_tokens,
supports_vision,
+ pass_max_tokens,
}
}
diff --git a/assets/playground.html b/assets/playground.html
index 485d7b6..0036fb4 100644
--- a/assets/playground.html
+++ b/assets/playground.html
@@ -939,8 +939,8 @@
body[body_key || setting_key] = this.settings[setting_key];
}
});
- const { max_output_token, need_max_tokens } = this.currentModel;
- if (!body["max_tokens"] && need_max_tokens) {
+ const { max_output_token, pass_max_tokens } = this.currentModel;
+ if (!body["max_tokens"] && pass_max_tokens) {
body["max_tokens"] = max_output_token;
};
return body;
@@ -1013,14 +1013,14 @@
function retrieveModel(models, id) {
const model = models.find(model => model.id === id);
if (!model) return {};
- const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
- const need_max_tokens = !!model.max_output_tokens;
+ const max_output_token = model.max_output_tokens;
const supports_vision = !!model.supports_vision;
+ const pass_max_tokens = !!model.pass_max_tokens;
return {
id,
max_output_token,
- need_max_tokens,
supports_vision,
+ pass_max_tokens,
}
}
diff --git a/config.example.yaml b/config.example.yaml
index a1dc07d..3f4672d 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -33,7 +33,6 @@ clients:
# models:
# - name: xxxx # The model name
# max_input_tokens: 100000
- # max_output_tokens: 4096
# supports_vision: true
# extra_fields: # Set custom parameters, will merge with the body json
# key: value
diff --git a/models.yaml b/models.yaml
index 38fb642..0799308 100644
--- a/models.yaml
+++ b/models.yaml
@@ -8,44 +8,45 @@
models:
- name: gpt-3.5-turbo
max_input_tokens: 16385
- max_output_tokens?: 4096
+ max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
- name: gpt-3.5-turbo-1106
max_input_tokens: 16385
- max_output_tokens?: 4096
+ max_output_tokens: 4096
input_price: 1
output_price: 2
- name: gpt-4-turbo
- max_input_tokens: 128000
- max_output_tokens?: 4096
- input_price: 10
- output_price: 30
- supports_vision: true
- - name: gpt-4-turbo-preview
- max_input_tokens: 128000
- max_output_tokens?: 4096
- input_price: 10
- output_price: 30
- - name: gpt-4-1106-preview
- max_input_tokens: 128000
- max_output_tokens?: 4096
- input_price: 10
- output_price: 30
- - name: gpt-4-vision-preview
max_input_tokens: 128000
max_output_tokens: 4096
input_price: 10
output_price: 30
supports_vision: true
+ - name: gpt-4-turbo-preview
+ max_input_tokens: 128000
+ max_output_tokens: 4096
+ input_price: 10
+ output_price: 30
+ - name: gpt-4-1106-preview
+ max_input_tokens: 128000
+ max_output_tokens: 4096
+ input_price: 10
+ output_price: 30
+ - name: gpt-4-vision-preview
+ max_input_tokens: 128000
+ max_output_tokens: 4096
+ pass_max_tokens: true
+ input_price: 10
+ output_price: 30
+ supports_vision: true
- name: gpt-4
max_input_tokens: 8192
- max_output_tokens?: 4096
+ max_output_tokens: 4096
input_price: 30
output_price: 60
- name: gpt-4-32k
max_input_tokens: 32768
- max_output_tokens?: 4096
+ max_output_tokens: 4096
input_price: 60
output_price: 120
@@ -59,18 +60,18 @@
models:
- name: gemini-1.0-pro-latest
max_input_tokens: 30720
- max_output_tokens?: 2048
+ max_output_tokens: 2048
input_price: 0.5
output_price: 1.5
- name: gemini-1.0-pro-vision-latest
max_input_tokens: 12288
- max_output_tokens?: 4096
+ max_output_tokens: 4096
input_price: 0.5
output_price: 1.5
supports_vision: true
- name: gemini-1.5-pro-latest
max_input_tokens: 1048576
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 7
output_price: 21
supports_vision: true
@@ -85,18 +86,21 @@
- name: claude-3-opus-20240229
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet-20240229
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku-20240307
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
@@ -140,12 +144,12 @@
models:
- name: command-r
max_input_tokens: 128000
- max_output_tokens?: 4000
+ max_output_tokens: 4000
input_price: 0.5
output_price: 1.5
- name: command-r-plus
max_input_tokens: 128000
- max_output_tokens?: 4000
+ max_output_tokens: 4000
input_price: 3
output_price: 15
@@ -159,28 +163,28 @@
models:
- name: llama-3-sonar-small-32k-chat
max_input_tokens: 32768
- max_output_tokens?: 32768
+ max_output_tokens: 32768
input_price: 0.2
output_price: 0.2
- name: llama-3-sonar-large-32k-chat
max_input_tokens: 32768
- max_output_tokens?: 32768
+ max_output_tokens: 32768
input_price: 0.6
output_price: 0.6
- name: llama-3-8b-instruct
max_input_tokens: 8192
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 0.2
output_price: 0.2
- name: llama-3-70b-instruct
max_input_tokens: 8192
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 1
output_price: 1
- name: mixtral-8x7b-instruct
max_input_tokens: 16384
- max_output_tokens?: 16384
+ max_output_tokens: 16384
input_price: 0.6
output_price: 0.6
@@ -195,22 +199,22 @@
models:
- name: llama3-8b-8192
max_input_tokens: 8192
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 0.05
output_price: 0.10
- name: llama3-70b-8192
max_input_tokens: 8192
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 0.59
output_price: 0.79
- name: mixtral-8x7b-32768
max_input_tokens: 32768
- max_output_tokens?: 32768
+ max_output_tokens: 32768
input_price: 0.27
output_price: 0.27
- name: gemma-7b-it
max_input_tokens: 8192
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 0.10
output_price: 0.10
@@ -224,18 +228,18 @@
models:
- name: gemini-1.0-pro
max_input_tokens: 24568
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 0.125
output_price: 0.375
- name: gemini-1.0-pro-vision
max_input_tokens: 14336
- max_output_tokens?: 2048
+ max_output_tokens: 2048
input_price: 0.125
output_price: 0.375
supports_vision: true
- name: gemini-1.5-pro-preview-0409
max_input_tokens: 1000000
- max_output_tokens?: 8192
+ max_output_tokens: 8192
input_price: 2.5
output_price: 7.5
supports_vision: true
@@ -250,18 +254,21 @@
- name: claude-3-opus@20240229
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: claude-3-sonnet@20240229
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: claude-3-haiku@20240307
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
@@ -277,44 +284,52 @@
- name: anthropic.claude-3-opus-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic.claude-3-sonnet-20240229-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic.claude-3-haiku-20240307-v1:0
max_input_tokens: 200000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
- name: meta.llama3-8b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.4
output_price: 0.6
- name: meta.llama3-70b-instruct-v1:0
max_input_tokens: 8192
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 2.65
output_price: 3.5
- name: mistral.mistral-7b-instruct-v0:2
max_input_tokens: 32000
max_output_tokens: 8192
+ pass_max_tokens: true
input_price: 0.15
output_price: 0.2
- name: mistral.mixtral-8x7b-instruct-v0:1
max_input_tokens: 32000
max_output_tokens: 8192
+ pass_max_tokens: true
input_price: 0.45
output_price: 0.7
- name: mistral.mistral-large-2402-v1:0
max_input_tokens: 32000
max_output_tokens: 8192
+ pass_max_tokens: true
input_price: 8
output_price: 2.4
@@ -328,21 +343,27 @@
- name: '@cf/meta/llama-3-8b-instruct'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- name: '@cf/google/gemma-7b-it-lora'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- name: '@hf/nexusflow/starling-lm-7b-beta'
max_input_tokens: 4096
max_output_tokens: 4096
+ pass_max_tokens: true
- platform: replicate
# docs:
@@ -354,21 +375,25 @@
- name: meta/meta-llama-3-70b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.65
output_price: 2.75
- name: meta/meta-llama-3-8b-instruct
max_input_tokens: 8192
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mistral-7b-instruct-v0.2
max_input_tokens: 32000
max_output_tokens: 8192
+ pass_max_tokens: true
input_price: 0.05
output_price: 0.25
- name: mistralai/mixtral-8x7b-instruct-v0.1
max_input_tokens: 32000
max_output_tokens: 8192
+ pass_max_tokens: true
input_price: 0.3
output_price: 1
@@ -382,26 +407,31 @@
- name: ernie-4.0-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
+ pass_max_tokens: true
input_price: 16.8
output_price: 16.8
- name: ernie-3.5-8k-preview
max_input_tokens: 5120
max_output_tokens: 2048
+ pass_max_tokens: true
input_price: 1.68
output_price: 1.68
- name: ernie-speed-128k
max_input_tokens: 124000
max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.56
output_price: 1.12
- name: ernie-lite-8k
max_input_tokens: 7168
max_output_tokens: 2048
+ pass_max_tokens: true
input_price: 0.42
output_price: 0.84
- name: ernie-tiny-8k
max_input_tokens: 7168
max_output_tokens: 2048
+ pass_max_tokens: true
input_price: 0.14
output_price: 0.14
@@ -414,22 +444,22 @@
models:
- name: qwen-turbo
max_input_tokens: 6000
- max_output_tokens?: 1500
+ max_output_tokens: 1500
input_price: 1.12
output_price: 1.12
- name: qwen-plus
max_input_tokens: 30000
- max_output_tokens?: 2000
+ max_output_tokens: 2000
input_price: 2.8
output_price: 2.8
- name: qwen-max
max_input_tokens: 6000
- max_output_tokens?: 2000
+ max_output_tokens: 2000
input_price: 16.8
output_price: 16.8
- name: qwen-max-longcontext
max_input_tokens: 28000
- max_output_tokens?: 2000
+ max_output_tokens: 2000
- name: qwen-vl-plus
input_price: 1.12
output_price: 1.12
@@ -686,16 +716,22 @@
supports_vision: true
- name: anthropic/claude-3-opus
max_input_tokens: 200000
+ max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 15
output_price: 75
supports_vision: true
- name: anthropic/claude-3-sonnet
max_input_tokens: 200000
+ max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 3
output_price: 15
supports_vision: true
- name: anthropic/claude-3-haiku
max_input_tokens: 200000
+ max_output_tokens: 4096
+ pass_max_tokens: true
input_price: 0.25
output_price: 1.25
supports_vision: true
diff --git a/src/client/bedrock.rs b/src/client/bedrock.rs
index 6abd939..b07152b 100644
--- a/src/client/bedrock.rs
+++ b/src/client/bedrock.rs
@@ -172,7 +172,7 @@ async fn send_message_streaming(
let data: Value = decode_chunk(message.payload()).ok_or_else(|| {
anyhow!("Invalid chunk data: {}", hex_encode(message.payload()))
})?;
- debug!("bedrock chunk: {data}");
+ // debug!("bedrock chunk: {data}");
match model_category {
ModelCategory::Anthropic => {
if let Some(typ) = data["type"].as_str() {
@@ -235,7 +235,7 @@ fn meta_llama_build_body(data: SendData, model: &Model, pt: PromptFormat) -> Res
let prompt = generate_prompt(&messages, pt)?;
let mut body = json!({ "prompt": prompt });
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_gen_len"] = v.into();
}
if let Some(v) = temperature {
@@ -258,7 +258,7 @@ fn mistral_build_body(data: SendData, model: &Model) -> Result {
let prompt = generate_prompt(&messages, MISTRAL_PROMPT_FORMAT)?;
let mut body = json!({ "prompt": prompt });
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/claude.rs b/src/client/claude.rs
index 0a230e9..89742f3 100644
--- a/src/client/claude.rs
+++ b/src/client/claude.rs
@@ -142,7 +142,7 @@ pub fn claude_build_body(data: SendData, model: &Model) -> Result {
if let Some(v) = system_message {
body["system"] = v.into();
}
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/cloudflare.rs b/src/client/cloudflare.rs
index 9758032..5a4bf8c 100644
--- a/src/client/cloudflare.rs
+++ b/src/client/cloudflare.rs
@@ -88,7 +88,7 @@ fn build_body(data: SendData, model: &Model) -> Result {
"messages": messages,
});
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/cohere.rs b/src/client/cohere.rs
index e0ef6f0..b5d6647 100644
--- a/src/client/cohere.rs
+++ b/src/client/cohere.rs
@@ -135,7 +135,7 @@ fn build_body(data: SendData, model: &Model) -> Result {
body["chat_history"] = messages.into();
}
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/ernie.rs b/src/client/ernie.rs
index d3002ff..28cb857 100644
--- a/src/client/ernie.rs
+++ b/src/client/ernie.rs
@@ -128,7 +128,7 @@ fn build_body(data: SendData, model: &Model) -> Value {
"messages": messages,
});
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_output_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/model.rs b/src/client/model.rs
index b24a546..d0d86e4 100644
--- a/src/client/model.rs
+++ b/src/client/model.rs
@@ -14,11 +14,11 @@ pub struct Model {
pub name: String,
pub max_input_tokens: Option,
pub max_output_tokens: Option,
- pub ref_max_output_tokens: Option,
+ pub pass_max_tokens: bool,
pub input_price: Option,
pub output_price: Option,
- pub extra_fields: Option>,
pub capabilities: ModelCapabilities,
+ pub extra_fields: Option>,
}
impl Default for Model {
@@ -32,13 +32,13 @@ impl Model {
Self {
client_name: client_name.into(),
name: name.into(),
- extra_fields: None,
max_input_tokens: None,
max_output_tokens: None,
- ref_max_output_tokens: None,
+ pass_max_tokens: false,
input_price: None,
output_price: None,
capabilities: ModelCapabilities::Text,
+ extra_fields: None,
}
}
@@ -49,8 +49,7 @@ impl Model {
let mut model = Model::new(client_name, &v.name);
model
.set_max_input_tokens(v.max_input_tokens)
- .set_max_output_tokens(v.max_output_tokens)
- .set_ref_max_output_tokens(v.ref_max_output_tokens)
+ .set_max_tokens(v.max_output_tokens, v.pass_max_tokens)
.set_input_price(v.input_price)
.set_output_price(v.output_price)
.set_supports_vision(v.supports_vision)
@@ -97,7 +96,7 @@ impl Model {
pub fn description(&self) -> String {
let max_input_tokens = format_option_value(&self.max_input_tokens);
- let max_output_tokens = format_option_value(&self.show_max_output_tokens());
+ let max_output_tokens = format_option_value(&self.max_output_tokens);
let input_price = format_option_value(&self.input_price);
let output_price = format_option_value(&self.output_price);
let vision = if self.capabilities.contains(ModelCapabilities::Vision) {
@@ -115,8 +114,12 @@ impl Model {
self.capabilities.contains(ModelCapabilities::Vision)
}
- pub fn show_max_output_tokens(&self) -> Option {
- self.max_output_tokens.or(self.ref_max_output_tokens)
+ pub fn max_tokens_param(&self) -> Option {
+ if self.pass_max_tokens {
+ self.max_output_tokens
+ } else {
+ None
+ }
}
pub fn set_max_input_tokens(&mut self, max_input_tokens: Option) -> &mut Self {
@@ -127,19 +130,16 @@ impl Model {
self
}
- pub fn set_max_output_tokens(&mut self, max_output_tokens: Option) -> &mut Self {
+ pub fn set_max_tokens(
+ &mut self,
+ max_output_tokens: Option,
+ pass_max_tokens: bool,
+ ) -> &mut Self {
match max_output_tokens {
None | Some(0) => self.max_output_tokens = None,
_ => self.max_output_tokens = max_output_tokens,
}
- self
- }
-
- pub fn set_ref_max_output_tokens(&mut self, ref_max_output_tokens: Option) -> &mut Self {
- match ref_max_output_tokens {
- None | Some(0) => self.ref_max_output_tokens = None,
- _ => self.ref_max_output_tokens = ref_max_output_tokens,
- }
+ self.pass_max_tokens = pass_max_tokens;
self
}
@@ -237,12 +237,12 @@ pub struct ModelConfig {
pub name: String,
pub max_input_tokens: Option,
pub max_output_tokens: Option,
- #[serde(rename = "max_output_tokens?")]
- pub ref_max_output_tokens: Option,
pub input_price: Option,
pub output_price: Option,
#[serde(default)]
pub supports_vision: bool,
+ #[serde(default)]
+ pub pass_max_tokens: bool,
pub extra_fields: Option>,
}
diff --git a/src/client/ollama.rs b/src/client/ollama.rs
index b61417a..6408d2e 100644
--- a/src/client/ollama.rs
+++ b/src/client/ollama.rs
@@ -159,7 +159,7 @@ fn build_body(data: SendData, model: &Model) -> Result {
"options": {},
});
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["options"]["num_predict"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/openai.rs b/src/client/openai.rs
index 08bb94d..0b111db 100644
--- a/src/client/openai.rs
+++ b/src/client/openai.rs
@@ -90,7 +90,7 @@ pub fn openai_build_body(data: SendData, model: &Model) -> Value {
"messages": messages,
});
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/qianwen.rs b/src/client/qianwen.rs
index 3fa17e6..7391a38 100644
--- a/src/client/qianwen.rs
+++ b/src/client/qianwen.rs
@@ -173,7 +173,7 @@ fn build_body(data: SendData, model: &Model, is_vl: bool) -> Result<(Value, bool
parameters["incremental_output"] = true.into();
}
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
parameters["max_tokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/client/replicate.rs b/src/client/replicate.rs
index a20ce71..34cfd94 100644
--- a/src/client/replicate.rs
+++ b/src/client/replicate.rs
@@ -148,7 +148,7 @@ fn build_body(data: SendData, model: &Model) -> Result {
"prompt_template": "{prompt}"
});
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
input["max_tokens"] = v.into();
input["max_new_tokens"] = v.into();
}
diff --git a/src/client/vertexai.rs b/src/client/vertexai.rs
index 9907a7f..4d06934 100644
--- a/src/client/vertexai.rs
+++ b/src/client/vertexai.rs
@@ -201,7 +201,7 @@ pub(crate) fn gemini_build_body(
body["safetySettings"] = safety_settings;
}
- if let Some(v) = model.max_output_tokens {
+ if let Some(v) = model.max_tokens_param() {
body["generationConfig"]["maxOutputTokens"] = v.into();
}
if let Some(v) = temperature {
diff --git a/src/config/mod.rs b/src/config/mod.rs
index f33ee73..5ac44a8 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -422,7 +422,7 @@ impl Config {
(
"max_output_tokens",
self.model
- .max_output_tokens
+ .max_tokens_param()
.map(|v| format!("{v} (current model)"))
.unwrap_or_else(|| "-".into()),
),
@@ -523,7 +523,7 @@ impl Config {
(values, args[0])
} else if args.len() == 2 {
let values = match args[0] {
- "max_output_tokens" => match self.model.show_max_output_tokens() {
+ "max_output_tokens" => match self.model.max_output_tokens {
Some(v) => vec![v.to_string()],
None => vec![],
},
@@ -564,7 +564,7 @@ impl Config {
match key {
"max_output_tokens" => {
let value = parse_value(value)?;
- self.model.set_max_output_tokens(value);
+ self.model.set_max_tokens(value, true);
}
"temperature" => {
let value = parse_value(value)?;
diff --git a/src/serve.rs b/src/serve.rs
index 5f748e8..4413c89 100644
--- a/src/serve.rs
+++ b/src/serve.rs
@@ -93,7 +93,7 @@ impl Server {
"id": id,
"max_input_tokens": model.max_input_tokens,
"max_output_tokens": model.max_output_tokens,
- "max_output_tokens?": model.ref_max_output_tokens,
+ "pass_max_tokens": model.pass_max_tokens,
"input_price": model.input_price,
"output_price": model.output_price,
"supports_vision": model.supports_vision(),
@@ -244,7 +244,7 @@ impl Server {
let mut client = init_client(&config)?;
if max_tokens.is_some() {
- client.model_mut().set_max_output_tokens(max_tokens);
+ client.model_mut().set_max_tokens(max_tokens, true);
}
let abort = create_abort_signal();
let http_client = client.build_client()?;