mirror of
https://github.com/sigoden/aichat
synced 2024-11-08 13:10:28 +00:00
refactor: update models.yaml and abandon anyscale (#701)
This commit is contained in:
parent
cee0eb453e
commit
0264ab80ab
@ -80,7 +80,6 @@ test-server() {
|
|||||||
|
|
||||||
OPENAI_COMPATIBLE_PLATFORMS=( \
|
OPENAI_COMPATIBLE_PLATFORMS=( \
|
||||||
openai,gpt-3.5-turbo,https://api.openai.com/v1 \
|
openai,gpt-3.5-turbo,https://api.openai.com/v1 \
|
||||||
anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \
|
|
||||||
deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
|
deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
|
||||||
deepseek,deepseek-chat,https://api.deepseek.com \
|
deepseek,deepseek-chat,https://api.deepseek.com \
|
||||||
fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \
|
fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \
|
||||||
|
@ -260,12 +260,6 @@ clients:
|
|||||||
name: lingyiwanwu
|
name: lingyiwanwu
|
||||||
api_key: xxx # ENV: {client}_API_KEY
|
api_key: xxx # ENV: {client}_API_KEY
|
||||||
|
|
||||||
# See https://docs.endpoints.anyscale.com/
|
|
||||||
- type: openai-compatible
|
|
||||||
name: anyscale
|
|
||||||
api_base: https://api.endpoints.anyscale.com/v1
|
|
||||||
api_key: xxx # ENV: {client}_API_KEY
|
|
||||||
|
|
||||||
# See https://deepinfra.com/docs
|
# See https://deepinfra.com/docs
|
||||||
- type: openai-compatible
|
- type: openai-compatible
|
||||||
name: deepinfra
|
name: deepinfra
|
||||||
|
99
models.yaml
99
models.yaml
@ -239,22 +239,21 @@
|
|||||||
models:
|
models:
|
||||||
- name: llama3-8b-8192
|
- name: llama3-8b-8192
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 0.05
|
input_price: 0
|
||||||
output_price: 0.08
|
output_price: 0
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
- name: llama3-70b-8192
|
- name: llama3-70b-8192
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 0.59
|
input_price: 0
|
||||||
output_price: 0.79
|
output_price: 0
|
||||||
supports_function_calling: true
|
|
||||||
- name: mixtral-8x7b-32768
|
- name: mixtral-8x7b-32768
|
||||||
max_input_tokens: 32768
|
max_input_tokens: 32768
|
||||||
input_price: 0.24
|
input_price: 0
|
||||||
output_price: 0.24
|
output_price: 0
|
||||||
- name: gemma-7b-it
|
- name: gemma2-9b-it
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 0.07
|
input_price: 0
|
||||||
output_price: 0.07
|
output_price: 0
|
||||||
|
|
||||||
- platform: vertexai
|
- platform: vertexai
|
||||||
# docs:
|
# docs:
|
||||||
@ -284,6 +283,13 @@
|
|||||||
input_price: 0.125
|
input_price: 0.125
|
||||||
output_price: 0.375
|
output_price: 0.375
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
|
- name: textembedding-gecko@003
|
||||||
|
type: embedding
|
||||||
|
max_input_tokens: 3072
|
||||||
|
input_price: 0.025
|
||||||
|
output_vector_size: 2048
|
||||||
|
default_chunk_size: 3000
|
||||||
|
max_batch_size: 5
|
||||||
- name: text-embedding-004
|
- name: text-embedding-004
|
||||||
type: embedding
|
type: embedding
|
||||||
max_input_tokens: 3072
|
max_input_tokens: 3072
|
||||||
@ -481,17 +487,17 @@
|
|||||||
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
|
||||||
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
|
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
|
||||||
models:
|
models:
|
||||||
- name: ernie-4.0-turbo-8k
|
- name: ernie-4.0-turbo-8k-preview
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 4.2
|
input_price: 4.2
|
||||||
output_price: 8.4
|
output_price: 8.4
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
- name: ernie-4.0-8k-0613
|
- name: ernie-4.0-8k-preview
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 16.8
|
input_price: 16.8
|
||||||
output_price: 16.8
|
output_price: 16.8
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
- name: ernie-3.5-8k-0613
|
- name: ernie-3.5-8k-preview
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 1.68
|
input_price: 1.68
|
||||||
output_price: 1.68
|
output_price: 1.68
|
||||||
@ -514,13 +520,6 @@
|
|||||||
output_vector_size: 1024
|
output_vector_size: 1024
|
||||||
default_chunk_size: 1000
|
default_chunk_size: 1000
|
||||||
max_batch_size: 16
|
max_batch_size: 16
|
||||||
- name: tao_8k
|
|
||||||
type: embedding
|
|
||||||
max_input_tokens: 8192
|
|
||||||
input_price: 0.28
|
|
||||||
output_vector_size: 1024
|
|
||||||
default_chunk_size: 2000
|
|
||||||
max_batch_size: 1
|
|
||||||
- name: bce_reranker_base
|
- name: bce_reranker_base
|
||||||
type: reranker
|
type: reranker
|
||||||
max_input_tokens: 1024
|
max_input_tokens: 1024
|
||||||
@ -616,6 +615,11 @@
|
|||||||
input_price: 14
|
input_price: 14
|
||||||
output_price: 14
|
output_price: 14
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
|
- name: glm-4-alltools
|
||||||
|
max_input_tokens: 2048
|
||||||
|
input_price: 14
|
||||||
|
output_price: 14
|
||||||
|
supports_function_calling: true
|
||||||
- name: glm-4-airx
|
- name: glm-4-airx
|
||||||
max_input_tokens: 8092
|
max_input_tokens: 8092
|
||||||
input_price: 1.4
|
input_price: 1.4
|
||||||
@ -678,50 +682,6 @@
|
|||||||
input_price: 0.14
|
input_price: 0.14
|
||||||
output_price: 0.14
|
output_price: 0.14
|
||||||
|
|
||||||
- platform: anyscale
|
|
||||||
# docs:
|
|
||||||
# - https://docs.anyscale.com/endpoints/text-generation/query-a-model
|
|
||||||
# - https://www.anyscale.com/pricing-detail
|
|
||||||
models:
|
|
||||||
- name: meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
max_input_tokens: 8192
|
|
||||||
input_price: 0.15
|
|
||||||
output_price: 0.15
|
|
||||||
- name: meta-llama/Meta-Llama-3-70B-Instruct
|
|
||||||
max_input_tokens: 8192
|
|
||||||
input_price: 1.0
|
|
||||||
output_price: 1.0
|
|
||||||
- name: mistralai/Mistral-7B-Instruct-v0.1
|
|
||||||
max_input_tokens: 16384
|
|
||||||
input_price: 0.15
|
|
||||||
output_price: 0.15
|
|
||||||
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
|
|
||||||
max_input_tokens: 32768
|
|
||||||
input_price: 0.50
|
|
||||||
output_price: 0.50
|
|
||||||
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
|
|
||||||
max_input_tokens: 65536
|
|
||||||
input_price: 0.90
|
|
||||||
output_price: 0.90
|
|
||||||
- name: google/gemma-7b-it
|
|
||||||
max_input_tokens: 8192
|
|
||||||
input_price: 0.15
|
|
||||||
output_price: 0.15
|
|
||||||
- name: BAAI/bge-large-en-v1.5
|
|
||||||
type: embedding
|
|
||||||
max_input_tokens: 512
|
|
||||||
input_price: 0.05
|
|
||||||
output_vector_size: 1024
|
|
||||||
default_chunk_size: 1000
|
|
||||||
max_batch_size: 30
|
|
||||||
- name: thenlper/gte-large
|
|
||||||
type: embedding
|
|
||||||
max_input_tokens: 512
|
|
||||||
input_price: 0.05
|
|
||||||
output_vector_size: 1024
|
|
||||||
default_chunk_size: 1000
|
|
||||||
max_batch_size: 30
|
|
||||||
|
|
||||||
- platform: deepinfra
|
- platform: deepinfra
|
||||||
# docs:
|
# docs:
|
||||||
# - https://deepinfra.com/models
|
# - https://deepinfra.com/models
|
||||||
@ -834,7 +794,7 @@
|
|||||||
max_input_tokens: 65536
|
max_input_tokens: 65536
|
||||||
input_price: 0.9
|
input_price: 0.9
|
||||||
output_price: 0.9
|
output_price: 0.9
|
||||||
- name: accounts/fireworks/models/gemma-7b-it
|
- name: accounts/fireworks/models/gemma2-9b-it
|
||||||
max_input_tokens: 8192
|
max_input_tokens: 8192
|
||||||
input_price: 0.2
|
input_price: 0.2
|
||||||
output_price: 0.2
|
output_price: 0.2
|
||||||
@ -935,6 +895,10 @@
|
|||||||
input_price: 0.125
|
input_price: 0.125
|
||||||
output_price: 0.375
|
output_price: 0.375
|
||||||
supports_function_calling: true
|
supports_function_calling: true
|
||||||
|
- name: google/gemma-2-9b-it
|
||||||
|
max_input_tokens: 2800000
|
||||||
|
input_price: 0.2
|
||||||
|
output_price: 0.2
|
||||||
- name: anthropic/claude-3.5-sonnet
|
- name: anthropic/claude-3.5-sonnet
|
||||||
max_input_tokens: 200000
|
max_input_tokens: 200000
|
||||||
max_output_tokens: 4096
|
max_output_tokens: 4096
|
||||||
@ -1021,7 +985,10 @@
|
|||||||
max_input_tokens: 28000
|
max_input_tokens: 28000
|
||||||
input_price: 1
|
input_price: 1
|
||||||
output_price: 1
|
output_price: 1
|
||||||
|
- name: 01-ai/yi-large
|
||||||
|
max_input_tokens: 32768
|
||||||
|
input_price: 3
|
||||||
|
output_price: 3
|
||||||
|
|
||||||
- platform: octoai
|
- platform: octoai
|
||||||
# docs:
|
# docs:
|
||||||
|
@ -51,8 +51,7 @@ register_client!(
|
|||||||
(qianwen, "qianwen", QianwenConfig, QianwenClient),
|
(qianwen, "qianwen", QianwenConfig, QianwenClient),
|
||||||
);
|
);
|
||||||
|
|
||||||
pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [
|
pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [
|
||||||
("anyscale", "https://api.endpoints.anyscale.com/v1"),
|
|
||||||
("deepinfra", "https://api.deepinfra.com/v1/openai"),
|
("deepinfra", "https://api.deepinfra.com/v1/openai"),
|
||||||
("deepseek", "https://api.deepseek.com"),
|
("deepseek", "https://api.deepseek.com"),
|
||||||
("fireworks", "https://api.fireworks.ai/inference/v1"),
|
("fireworks", "https://api.fireworks.ai/inference/v1"),
|
||||||
|
@ -69,14 +69,10 @@ impl VertexAIClient {
|
|||||||
let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
|
let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
|
||||||
let url = format!("{base_url}/google/models/{}:predict", self.model.name());
|
let url = format!("{base_url}/google/models/{}:predict", self.model.name());
|
||||||
|
|
||||||
let task_type = match data.query {
|
|
||||||
true => "RETRIEVAL_DOCUMENT",
|
|
||||||
false => "QUESTION_ANSWERING",
|
|
||||||
};
|
|
||||||
let instances: Vec<_> = data
|
let instances: Vec<_> = data
|
||||||
.texts
|
.texts
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| json!({"task_type": task_type, "content": v}))
|
.map(|v| json!({"content": v}))
|
||||||
.collect();
|
.collect();
|
||||||
let body = json!({
|
let body = json!({
|
||||||
"instances": instances,
|
"instances": instances,
|
||||||
|
Loading…
Reference in New Issue
Block a user