mirror of
https://github.com/sigoden/aichat
synced 2024-11-16 06:15:26 +00:00
refactor: model pass_max_tokens (#493)
This commit is contained in:
parent
1c6c740381
commit
7762cd6bed
@ -746,8 +746,8 @@
|
||||
messages: messages,
|
||||
stream: true,
|
||||
};
|
||||
const { max_output_token, need_max_tokens } = retrieveModel(this.models, chat.model_id);
|
||||
if (!body["max_tokens"] && need_max_tokens) {
|
||||
const { max_output_token, pass_max_tokens } = retrieveModel(this.models, chat.model_id);
|
||||
if (!body["max_tokens"] && pass_max_tokens) {
|
||||
body["max_tokens"] = max_output_token;
|
||||
};
|
||||
return body;
|
||||
@ -819,14 +819,14 @@
|
||||
function retrieveModel(models, id) {
|
||||
const model = models.find(model => model.id === id);
|
||||
if (!model) return {};
|
||||
const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
|
||||
const need_max_tokens = !!model.max_output_tokens;
|
||||
const max_output_token = model.max_output_tokens;
|
||||
const supports_vision = !!model.supports_vision;
|
||||
const pass_max_tokens = !!model.pass_max_tokens;
|
||||
return {
|
||||
id,
|
||||
max_output_token,
|
||||
need_max_tokens,
|
||||
supports_vision,
|
||||
pass_max_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -939,8 +939,8 @@
|
||||
body[body_key || setting_key] = this.settings[setting_key];
|
||||
}
|
||||
});
|
||||
const { max_output_token, need_max_tokens } = this.currentModel;
|
||||
if (!body["max_tokens"] && need_max_tokens) {
|
||||
const { max_output_token, pass_max_tokens } = this.currentModel;
|
||||
if (!body["max_tokens"] && pass_max_tokens) {
|
||||
body["max_tokens"] = max_output_token;
|
||||
};
|
||||
return body;
|
||||
@ -1013,14 +1013,14 @@
|
||||
function retrieveModel(models, id) {
|
||||
const model = models.find(model => model.id === id);
|
||||
if (!model) return {};
|
||||
const max_output_token = model.max_output_tokens || model["max_output_tokens?"] || null;
|
||||
const need_max_tokens = !!model.max_output_tokens;
|
||||
const max_output_token = model.max_output_tokens;
|
||||
const supports_vision = !!model.supports_vision;
|
||||
const pass_max_tokens = !!model.pass_max_tokens;
|
||||
return {
|
||||
id,
|
||||
max_output_token,
|
||||
need_max_tokens,
|
||||
supports_vision,
|
||||
pass_max_tokens,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,6 @@ clients:
|
||||
# models:
|
||||
# - name: xxxx # The model name
|
||||
# max_input_tokens: 100000
|
||||
# max_output_tokens: 4096
|
||||
# supports_vision: true
|
||||
# extra_fields: # Set custom parameters, will merge with the body json
|
||||
# key: value
|
||||
|
118
models.yaml
118
models.yaml
@ -8,44 +8,45 @@
|
||||
models:
|
||||
- name: gpt-3.5-turbo
|
||||
max_input_tokens: 16385
|
||||
max_output_tokens?: 4096
|
||||
max_output_tokens: 4096
|
||||
input_price: 0.5
|
||||
output_price: 1.5
|
||||
- name: gpt-3.5-turbo-1106
|
||||
max_input_tokens: 16385
|
||||
max_output_tokens?: 4096
|
||||
max_output_tokens: 4096
|
||||
input_price: 1
|
||||
output_price: 2
|
||||
- name: gpt-4-turbo
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens?: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
supports_vision: true
|
||||
- name: gpt-4-turbo-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens?: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
- name: gpt-4-1106-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens?: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
- name: gpt-4-vision-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
supports_vision: true
|
||||
- name: gpt-4-turbo-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
- name: gpt-4-1106-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens: 4096
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
- name: gpt-4-vision-preview
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 10
|
||||
output_price: 30
|
||||
supports_vision: true
|
||||
- name: gpt-4
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 4096
|
||||
max_output_tokens: 4096
|
||||
input_price: 30
|
||||
output_price: 60
|
||||
- name: gpt-4-32k
|
||||
max_input_tokens: 32768
|
||||
max_output_tokens?: 4096
|
||||
max_output_tokens: 4096
|
||||
input_price: 60
|
||||
output_price: 120
|
||||
|
||||
@ -59,18 +60,18 @@
|
||||
models:
|
||||
- name: gemini-1.0-pro-latest
|
||||
max_input_tokens: 30720
|
||||
max_output_tokens?: 2048
|
||||
max_output_tokens: 2048
|
||||
input_price: 0.5
|
||||
output_price: 1.5
|
||||
- name: gemini-1.0-pro-vision-latest
|
||||
max_input_tokens: 12288
|
||||
max_output_tokens?: 4096
|
||||
max_output_tokens: 4096
|
||||
input_price: 0.5
|
||||
output_price: 1.5
|
||||
supports_vision: true
|
||||
- name: gemini-1.5-pro-latest
|
||||
max_input_tokens: 1048576
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 7
|
||||
output_price: 21
|
||||
supports_vision: true
|
||||
@ -85,18 +86,21 @@
|
||||
- name: claude-3-opus-20240229
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 15
|
||||
output_price: 75
|
||||
supports_vision: true
|
||||
- name: claude-3-sonnet-20240229
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 3
|
||||
output_price: 15
|
||||
supports_vision: true
|
||||
- name: claude-3-haiku-20240307
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.25
|
||||
output_price: 1.25
|
||||
supports_vision: true
|
||||
@ -140,12 +144,12 @@
|
||||
models:
|
||||
- name: command-r
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens?: 4000
|
||||
max_output_tokens: 4000
|
||||
input_price: 0.5
|
||||
output_price: 1.5
|
||||
- name: command-r-plus
|
||||
max_input_tokens: 128000
|
||||
max_output_tokens?: 4000
|
||||
max_output_tokens: 4000
|
||||
input_price: 3
|
||||
output_price: 15
|
||||
|
||||
@ -159,28 +163,28 @@
|
||||
models:
|
||||
- name: llama-3-sonar-small-32k-chat
|
||||
max_input_tokens: 32768
|
||||
max_output_tokens?: 32768
|
||||
max_output_tokens: 32768
|
||||
input_price: 0.2
|
||||
output_price: 0.2
|
||||
- name: llama-3-sonar-large-32k-chat
|
||||
max_input_tokens: 32768
|
||||
max_output_tokens?: 32768
|
||||
max_output_tokens: 32768
|
||||
input_price: 0.6
|
||||
output_price: 0.6
|
||||
|
||||
- name: llama-3-8b-instruct
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 0.2
|
||||
output_price: 0.2
|
||||
- name: llama-3-70b-instruct
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 1
|
||||
output_price: 1
|
||||
- name: mixtral-8x7b-instruct
|
||||
max_input_tokens: 16384
|
||||
max_output_tokens?: 16384
|
||||
max_output_tokens: 16384
|
||||
input_price: 0.6
|
||||
output_price: 0.6
|
||||
|
||||
@ -195,22 +199,22 @@
|
||||
models:
|
||||
- name: llama3-8b-8192
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 0.05
|
||||
output_price: 0.10
|
||||
- name: llama3-70b-8192
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 0.59
|
||||
output_price: 0.79
|
||||
- name: mixtral-8x7b-32768
|
||||
max_input_tokens: 32768
|
||||
max_output_tokens?: 32768
|
||||
max_output_tokens: 32768
|
||||
input_price: 0.27
|
||||
output_price: 0.27
|
||||
- name: gemma-7b-it
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 0.10
|
||||
output_price: 0.10
|
||||
|
||||
@ -224,18 +228,18 @@
|
||||
models:
|
||||
- name: gemini-1.0-pro
|
||||
max_input_tokens: 24568
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 0.125
|
||||
output_price: 0.375
|
||||
- name: gemini-1.0-pro-vision
|
||||
max_input_tokens: 14336
|
||||
max_output_tokens?: 2048
|
||||
max_output_tokens: 2048
|
||||
input_price: 0.125
|
||||
output_price: 0.375
|
||||
supports_vision: true
|
||||
- name: gemini-1.5-pro-preview-0409
|
||||
max_input_tokens: 1000000
|
||||
max_output_tokens?: 8192
|
||||
max_output_tokens: 8192
|
||||
input_price: 2.5
|
||||
output_price: 7.5
|
||||
supports_vision: true
|
||||
@ -250,18 +254,21 @@
|
||||
- name: claude-3-opus@20240229
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 15
|
||||
output_price: 75
|
||||
supports_vision: true
|
||||
- name: claude-3-sonnet@20240229
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 3
|
||||
output_price: 15
|
||||
supports_vision: true
|
||||
- name: claude-3-haiku@20240307
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.25
|
||||
output_price: 1.25
|
||||
supports_vision: true
|
||||
@ -277,44 +284,52 @@
|
||||
- name: anthropic.claude-3-opus-20240229-v1:0
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 15
|
||||
output_price: 75
|
||||
supports_vision: true
|
||||
- name: anthropic.claude-3-sonnet-20240229-v1:0
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 3
|
||||
output_price: 15
|
||||
supports_vision: true
|
||||
- name: anthropic.claude-3-haiku-20240307-v1:0
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.25
|
||||
output_price: 1.25
|
||||
supports_vision: true
|
||||
- name: meta.llama3-8b-instruct-v1:0
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.4
|
||||
output_price: 0.6
|
||||
- name: meta.llama3-70b-instruct-v1:0
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 2.65
|
||||
output_price: 3.5
|
||||
- name: mistral.mistral-7b-instruct-v0:2
|
||||
max_input_tokens: 32000
|
||||
max_output_tokens: 8192
|
||||
pass_max_tokens: true
|
||||
input_price: 0.15
|
||||
output_price: 0.2
|
||||
- name: mistral.mixtral-8x7b-instruct-v0:1
|
||||
max_input_tokens: 32000
|
||||
max_output_tokens: 8192
|
||||
pass_max_tokens: true
|
||||
input_price: 0.45
|
||||
output_price: 0.7
|
||||
- name: mistral.mistral-large-2402-v1:0
|
||||
max_input_tokens: 32000
|
||||
max_output_tokens: 8192
|
||||
pass_max_tokens: true
|
||||
input_price: 8
|
||||
output_price: 2.4
|
||||
|
||||
@ -328,21 +343,27 @@
|
||||
- name: '@cf/meta/llama-3-8b-instruct'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
- name: '@cf/mistral/mistral-7b-instruct-v0.2-lora'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
- name: '@cf/google/gemma-7b-it-lora'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
- name: '@cf/qwen/qwen1.5-14b-chat-awq'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
- name: '@hf/thebloke/deepseek-coder-6.7b-instruct-awq'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
- name: '@hf/nexusflow/starling-lm-7b-beta'
|
||||
max_input_tokens: 4096
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
|
||||
- platform: replicate
|
||||
# docs:
|
||||
@ -354,21 +375,25 @@
|
||||
- name: meta/meta-llama-3-70b-instruct
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.65
|
||||
output_price: 2.75
|
||||
- name: meta/meta-llama-3-8b-instruct
|
||||
max_input_tokens: 8192
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.05
|
||||
output_price: 0.25
|
||||
- name: mistralai/mistral-7b-instruct-v0.2
|
||||
max_input_tokens: 32000
|
||||
max_output_tokens: 8192
|
||||
pass_max_tokens: true
|
||||
input_price: 0.05
|
||||
output_price: 0.25
|
||||
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
||||
max_input_tokens: 32000
|
||||
max_output_tokens: 8192
|
||||
pass_max_tokens: true
|
||||
input_price: 0.3
|
||||
output_price: 1
|
||||
|
||||
@ -382,26 +407,31 @@
|
||||
- name: ernie-4.0-8k-preview
|
||||
max_input_tokens: 5120
|
||||
max_output_tokens: 2048
|
||||
pass_max_tokens: true
|
||||
input_price: 16.8
|
||||
output_price: 16.8
|
||||
- name: ernie-3.5-8k-preview
|
||||
max_input_tokens: 5120
|
||||
max_output_tokens: 2048
|
||||
pass_max_tokens: true
|
||||
input_price: 1.68
|
||||
output_price: 1.68
|
||||
- name: ernie-speed-128k
|
||||
max_input_tokens: 124000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.56
|
||||
output_price: 1.12
|
||||
- name: ernie-lite-8k
|
||||
max_input_tokens: 7168
|
||||
max_output_tokens: 2048
|
||||
pass_max_tokens: true
|
||||
input_price: 0.42
|
||||
output_price: 0.84
|
||||
- name: ernie-tiny-8k
|
||||
max_input_tokens: 7168
|
||||
max_output_tokens: 2048
|
||||
pass_max_tokens: true
|
||||
input_price: 0.14
|
||||
output_price: 0.14
|
||||
|
||||
@ -414,22 +444,22 @@
|
||||
models:
|
||||
- name: qwen-turbo
|
||||
max_input_tokens: 6000
|
||||
max_output_tokens?: 1500
|
||||
max_output_tokens: 1500
|
||||
input_price: 1.12
|
||||
output_price: 1.12
|
||||
- name: qwen-plus
|
||||
max_input_tokens: 30000
|
||||
max_output_tokens?: 2000
|
||||
max_output_tokens: 2000
|
||||
input_price: 2.8
|
||||
output_price: 2.8
|
||||
- name: qwen-max
|
||||
max_input_tokens: 6000
|
||||
max_output_tokens?: 2000
|
||||
max_output_tokens: 2000
|
||||
input_price: 16.8
|
||||
output_price: 16.8
|
||||
- name: qwen-max-longcontext
|
||||
max_input_tokens: 28000
|
||||
max_output_tokens?: 2000
|
||||
max_output_tokens: 2000
|
||||
- name: qwen-vl-plus
|
||||
input_price: 1.12
|
||||
output_price: 1.12
|
||||
@ -686,16 +716,22 @@
|
||||
supports_vision: true
|
||||
- name: anthropic/claude-3-opus
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 15
|
||||
output_price: 75
|
||||
supports_vision: true
|
||||
- name: anthropic/claude-3-sonnet
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 3
|
||||
output_price: 15
|
||||
supports_vision: true
|
||||
- name: anthropic/claude-3-haiku
|
||||
max_input_tokens: 200000
|
||||
max_output_tokens: 4096
|
||||
pass_max_tokens: true
|
||||
input_price: 0.25
|
||||
output_price: 1.25
|
||||
supports_vision: true
|
||||
|
@ -172,7 +172,7 @@ async fn send_message_streaming(
|
||||
let data: Value = decode_chunk(message.payload()).ok_or_else(|| {
|
||||
anyhow!("Invalid chunk data: {}", hex_encode(message.payload()))
|
||||
})?;
|
||||
debug!("bedrock chunk: {data}");
|
||||
// debug!("bedrock chunk: {data}");
|
||||
match model_category {
|
||||
ModelCategory::Anthropic => {
|
||||
if let Some(typ) = data["type"].as_str() {
|
||||
@ -235,7 +235,7 @@ fn meta_llama_build_body(data: SendData, model: &Model, pt: PromptFormat) -> Res
|
||||
let prompt = generate_prompt(&messages, pt)?;
|
||||
let mut body = json!({ "prompt": prompt });
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_gen_len"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
@ -258,7 +258,7 @@ fn mistral_build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
let prompt = generate_prompt(&messages, MISTRAL_PROMPT_FORMAT)?;
|
||||
let mut body = json!({ "prompt": prompt });
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -142,7 +142,7 @@ pub fn claude_build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
if let Some(v) = system_message {
|
||||
body["system"] = v.into();
|
||||
}
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -88,7 +88,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
"messages": messages,
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -135,7 +135,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
body["chat_history"] = messages.into();
|
||||
}
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -128,7 +128,7 @@ fn build_body(data: SendData, model: &Model) -> Value {
|
||||
"messages": messages,
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_output_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -14,11 +14,11 @@ pub struct Model {
|
||||
pub name: String,
|
||||
pub max_input_tokens: Option<usize>,
|
||||
pub max_output_tokens: Option<isize>,
|
||||
pub ref_max_output_tokens: Option<isize>,
|
||||
pub pass_max_tokens: bool,
|
||||
pub input_price: Option<f64>,
|
||||
pub output_price: Option<f64>,
|
||||
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
pub capabilities: ModelCapabilities,
|
||||
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
impl Default for Model {
|
||||
@ -32,13 +32,13 @@ impl Model {
|
||||
Self {
|
||||
client_name: client_name.into(),
|
||||
name: name.into(),
|
||||
extra_fields: None,
|
||||
max_input_tokens: None,
|
||||
max_output_tokens: None,
|
||||
ref_max_output_tokens: None,
|
||||
pass_max_tokens: false,
|
||||
input_price: None,
|
||||
output_price: None,
|
||||
capabilities: ModelCapabilities::Text,
|
||||
extra_fields: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,8 +49,7 @@ impl Model {
|
||||
let mut model = Model::new(client_name, &v.name);
|
||||
model
|
||||
.set_max_input_tokens(v.max_input_tokens)
|
||||
.set_max_output_tokens(v.max_output_tokens)
|
||||
.set_ref_max_output_tokens(v.ref_max_output_tokens)
|
||||
.set_max_tokens(v.max_output_tokens, v.pass_max_tokens)
|
||||
.set_input_price(v.input_price)
|
||||
.set_output_price(v.output_price)
|
||||
.set_supports_vision(v.supports_vision)
|
||||
@ -97,7 +96,7 @@ impl Model {
|
||||
|
||||
pub fn description(&self) -> String {
|
||||
let max_input_tokens = format_option_value(&self.max_input_tokens);
|
||||
let max_output_tokens = format_option_value(&self.show_max_output_tokens());
|
||||
let max_output_tokens = format_option_value(&self.max_output_tokens);
|
||||
let input_price = format_option_value(&self.input_price);
|
||||
let output_price = format_option_value(&self.output_price);
|
||||
let vision = if self.capabilities.contains(ModelCapabilities::Vision) {
|
||||
@ -115,8 +114,12 @@ impl Model {
|
||||
self.capabilities.contains(ModelCapabilities::Vision)
|
||||
}
|
||||
|
||||
pub fn show_max_output_tokens(&self) -> Option<isize> {
|
||||
self.max_output_tokens.or(self.ref_max_output_tokens)
|
||||
pub fn max_tokens_param(&self) -> Option<isize> {
|
||||
if self.pass_max_tokens {
|
||||
self.max_output_tokens
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_max_input_tokens(&mut self, max_input_tokens: Option<usize>) -> &mut Self {
|
||||
@ -127,19 +130,16 @@ impl Model {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_max_output_tokens(&mut self, max_output_tokens: Option<isize>) -> &mut Self {
|
||||
pub fn set_max_tokens(
|
||||
&mut self,
|
||||
max_output_tokens: Option<isize>,
|
||||
pass_max_tokens: bool,
|
||||
) -> &mut Self {
|
||||
match max_output_tokens {
|
||||
None | Some(0) => self.max_output_tokens = None,
|
||||
_ => self.max_output_tokens = max_output_tokens,
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_ref_max_output_tokens(&mut self, ref_max_output_tokens: Option<isize>) -> &mut Self {
|
||||
match ref_max_output_tokens {
|
||||
None | Some(0) => self.ref_max_output_tokens = None,
|
||||
_ => self.ref_max_output_tokens = ref_max_output_tokens,
|
||||
}
|
||||
self.pass_max_tokens = pass_max_tokens;
|
||||
self
|
||||
}
|
||||
|
||||
@ -237,12 +237,12 @@ pub struct ModelConfig {
|
||||
pub name: String,
|
||||
pub max_input_tokens: Option<usize>,
|
||||
pub max_output_tokens: Option<isize>,
|
||||
#[serde(rename = "max_output_tokens?")]
|
||||
pub ref_max_output_tokens: Option<isize>,
|
||||
pub input_price: Option<f64>,
|
||||
pub output_price: Option<f64>,
|
||||
#[serde(default)]
|
||||
pub supports_vision: bool,
|
||||
#[serde(default)]
|
||||
pub pass_max_tokens: bool,
|
||||
pub extra_fields: Option<serde_json::Map<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
|
@ -159,7 +159,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
"options": {},
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["options"]["num_predict"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -90,7 +90,7 @@ pub fn openai_build_body(data: SendData, model: &Model) -> Value {
|
||||
"messages": messages,
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -173,7 +173,7 @@ fn build_body(data: SendData, model: &Model, is_vl: bool) -> Result<(Value, bool
|
||||
parameters["incremental_output"] = true.into();
|
||||
}
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
parameters["max_tokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -148,7 +148,7 @@ fn build_body(data: SendData, model: &Model) -> Result<Value> {
|
||||
"prompt_template": "{prompt}"
|
||||
});
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
input["max_tokens"] = v.into();
|
||||
input["max_new_tokens"] = v.into();
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ pub(crate) fn gemini_build_body(
|
||||
body["safetySettings"] = safety_settings;
|
||||
}
|
||||
|
||||
if let Some(v) = model.max_output_tokens {
|
||||
if let Some(v) = model.max_tokens_param() {
|
||||
body["generationConfig"]["maxOutputTokens"] = v.into();
|
||||
}
|
||||
if let Some(v) = temperature {
|
||||
|
@ -422,7 +422,7 @@ impl Config {
|
||||
(
|
||||
"max_output_tokens",
|
||||
self.model
|
||||
.max_output_tokens
|
||||
.max_tokens_param()
|
||||
.map(|v| format!("{v} (current model)"))
|
||||
.unwrap_or_else(|| "-".into()),
|
||||
),
|
||||
@ -523,7 +523,7 @@ impl Config {
|
||||
(values, args[0])
|
||||
} else if args.len() == 2 {
|
||||
let values = match args[0] {
|
||||
"max_output_tokens" => match self.model.show_max_output_tokens() {
|
||||
"max_output_tokens" => match self.model.max_output_tokens {
|
||||
Some(v) => vec![v.to_string()],
|
||||
None => vec![],
|
||||
},
|
||||
@ -564,7 +564,7 @@ impl Config {
|
||||
match key {
|
||||
"max_output_tokens" => {
|
||||
let value = parse_value(value)?;
|
||||
self.model.set_max_output_tokens(value);
|
||||
self.model.set_max_tokens(value, true);
|
||||
}
|
||||
"temperature" => {
|
||||
let value = parse_value(value)?;
|
||||
|
@ -93,7 +93,7 @@ impl Server {
|
||||
"id": id,
|
||||
"max_input_tokens": model.max_input_tokens,
|
||||
"max_output_tokens": model.max_output_tokens,
|
||||
"max_output_tokens?": model.ref_max_output_tokens,
|
||||
"pass_max_tokens": model.pass_max_tokens,
|
||||
"input_price": model.input_price,
|
||||
"output_price": model.output_price,
|
||||
"supports_vision": model.supports_vision(),
|
||||
@ -244,7 +244,7 @@ impl Server {
|
||||
|
||||
let mut client = init_client(&config)?;
|
||||
if max_tokens.is_some() {
|
||||
client.model_mut().set_max_output_tokens(max_tokens);
|
||||
client.model_mut().set_max_tokens(max_tokens, true);
|
||||
}
|
||||
let abort = create_abort_signal();
|
||||
let http_client = client.build_client()?;
|
||||
|
Loading…
Reference in New Issue
Block a user