refactor: update models.yaml and abandon anyscale (#701)

This commit is contained in:
sigoden 2024-07-11 07:40:24 +08:00 committed by GitHub
parent cee0eb453e
commit 0264ab80ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 35 additions and 80 deletions

View File

@ -80,7 +80,6 @@ test-server() {
OPENAI_COMPATIBLE_PLATFORMS=( \ OPENAI_COMPATIBLE_PLATFORMS=( \
openai,gpt-3.5-turbo,https://api.openai.com/v1 \ openai,gpt-3.5-turbo,https://api.openai.com/v1 \
anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \
deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \ deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \
deepseek,deepseek-chat,https://api.deepseek.com \ deepseek,deepseek-chat,https://api.deepseek.com \
fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \ fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \

View File

@ -260,12 +260,6 @@ clients:
name: lingyiwanwu name: lingyiwanwu
api_key: xxx # ENV: {client}_API_KEY api_key: xxx # ENV: {client}_API_KEY
# See https://docs.endpoints.anyscale.com/
- type: openai-compatible
name: anyscale
api_base: https://api.endpoints.anyscale.com/v1
api_key: xxx # ENV: {client}_API_KEY
# See https://deepinfra.com/docs # See https://deepinfra.com/docs
- type: openai-compatible - type: openai-compatible
name: deepinfra name: deepinfra

View File

@ -239,22 +239,21 @@
models: models:
- name: llama3-8b-8192 - name: llama3-8b-8192
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.05 input_price: 0
output_price: 0.08 output_price: 0
supports_function_calling: true supports_function_calling: true
- name: llama3-70b-8192 - name: llama3-70b-8192
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.59 input_price: 0
output_price: 0.79 output_price: 0
supports_function_calling: true
- name: mixtral-8x7b-32768 - name: mixtral-8x7b-32768
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.24 input_price: 0
output_price: 0.24 output_price: 0
- name: gemma-7b-it - name: gemma2-9b-it
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.07 input_price: 0
output_price: 0.07 output_price: 0
- platform: vertexai - platform: vertexai
# docs: # docs:
@ -284,6 +283,13 @@
input_price: 0.125 input_price: 0.125
output_price: 0.375 output_price: 0.375
supports_function_calling: true supports_function_calling: true
- name: textembedding-gecko@003
type: embedding
max_input_tokens: 3072
input_price: 0.025
output_vector_size: 2048
default_chunk_size: 3000
max_batch_size: 5
- name: text-embedding-004 - name: text-embedding-004
type: embedding type: embedding
max_input_tokens: 3072 max_input_tokens: 3072
@ -481,17 +487,17 @@
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu
# - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7
models: models:
- name: ernie-4.0-turbo-8k - name: ernie-4.0-turbo-8k-preview
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 4.2 input_price: 4.2
output_price: 8.4 output_price: 8.4
supports_function_calling: true supports_function_calling: true
- name: ernie-4.0-8k-0613 - name: ernie-4.0-8k-preview
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 16.8 input_price: 16.8
output_price: 16.8 output_price: 16.8
supports_function_calling: true supports_function_calling: true
- name: ernie-3.5-8k-0613 - name: ernie-3.5-8k-preview
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 1.68 input_price: 1.68
output_price: 1.68 output_price: 1.68
@ -514,13 +520,6 @@
output_vector_size: 1024 output_vector_size: 1024
default_chunk_size: 1000 default_chunk_size: 1000
max_batch_size: 16 max_batch_size: 16
- name: tao_8k
type: embedding
max_input_tokens: 8192
input_price: 0.28
output_vector_size: 1024
default_chunk_size: 2000
max_batch_size: 1
- name: bce_reranker_base - name: bce_reranker_base
type: reranker type: reranker
max_input_tokens: 1024 max_input_tokens: 1024
@ -616,6 +615,11 @@
input_price: 14 input_price: 14
output_price: 14 output_price: 14
supports_function_calling: true supports_function_calling: true
- name: glm-4-alltools
max_input_tokens: 2048
input_price: 14
output_price: 14
supports_function_calling: true
- name: glm-4-airx - name: glm-4-airx
max_input_tokens: 8092 max_input_tokens: 8092
input_price: 1.4 input_price: 1.4
@ -678,50 +682,6 @@
input_price: 0.14 input_price: 0.14
output_price: 0.14 output_price: 0.14
- platform: anyscale
# docs:
# - https://docs.anyscale.com/endpoints/text-generation/query-a-model
# - https://www.anyscale.com/pricing-detail
models:
- name: meta-llama/Meta-Llama-3-8B-Instruct
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- name: meta-llama/Meta-Llama-3-70B-Instruct
max_input_tokens: 8192
input_price: 1.0
output_price: 1.0
- name: mistralai/Mistral-7B-Instruct-v0.1
max_input_tokens: 16384
input_price: 0.15
output_price: 0.15
- name: mistralai/Mixtral-8x7B-Instruct-v0.1
max_input_tokens: 32768
input_price: 0.50
output_price: 0.50
- name: mistralai/Mixtral-8x22B-Instruct-v0.1
max_input_tokens: 65536
input_price: 0.90
output_price: 0.90
- name: google/gemma-7b-it
max_input_tokens: 8192
input_price: 0.15
output_price: 0.15
- name: BAAI/bge-large-en-v1.5
type: embedding
max_input_tokens: 512
input_price: 0.05
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 30
- name: thenlper/gte-large
type: embedding
max_input_tokens: 512
input_price: 0.05
output_vector_size: 1024
default_chunk_size: 1000
max_batch_size: 30
- platform: deepinfra - platform: deepinfra
# docs: # docs:
# - https://deepinfra.com/models # - https://deepinfra.com/models
@ -834,7 +794,7 @@
max_input_tokens: 65536 max_input_tokens: 65536
input_price: 0.9 input_price: 0.9
output_price: 0.9 output_price: 0.9
- name: accounts/fireworks/models/gemma-7b-it - name: accounts/fireworks/models/gemma2-9b-it
max_input_tokens: 8192 max_input_tokens: 8192
input_price: 0.2 input_price: 0.2
output_price: 0.2 output_price: 0.2
@ -935,6 +895,10 @@
input_price: 0.125 input_price: 0.125
output_price: 0.375 output_price: 0.375
supports_function_calling: true supports_function_calling: true
- name: google/gemma-2-9b-it
max_input_tokens: 2800000
input_price: 0.2
output_price: 0.2
- name: anthropic/claude-3.5-sonnet - name: anthropic/claude-3.5-sonnet
max_input_tokens: 200000 max_input_tokens: 200000
max_output_tokens: 4096 max_output_tokens: 4096
@ -1021,7 +985,10 @@
max_input_tokens: 28000 max_input_tokens: 28000
input_price: 1 input_price: 1
output_price: 1 output_price: 1
- name: 01-ai/yi-large
max_input_tokens: 32768
input_price: 3
output_price: 3
- platform: octoai - platform: octoai
# docs: # docs:

View File

@ -51,8 +51,7 @@ register_client!(
(qianwen, "qianwen", QianwenConfig, QianwenClient), (qianwen, "qianwen", QianwenConfig, QianwenClient),
); );
pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [ pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [
("anyscale", "https://api.endpoints.anyscale.com/v1"),
("deepinfra", "https://api.deepinfra.com/v1/openai"), ("deepinfra", "https://api.deepinfra.com/v1/openai"),
("deepseek", "https://api.deepseek.com"), ("deepseek", "https://api.deepseek.com"),
("fireworks", "https://api.fireworks.ai/inference/v1"), ("fireworks", "https://api.fireworks.ai/inference/v1"),

View File

@ -69,14 +69,10 @@ impl VertexAIClient {
let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers"); let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers");
let url = format!("{base_url}/google/models/{}:predict", self.model.name()); let url = format!("{base_url}/google/models/{}:predict", self.model.name());
let task_type = match data.query {
true => "RETRIEVAL_DOCUMENT",
false => "QUESTION_ANSWERING",
};
let instances: Vec<_> = data let instances: Vec<_> = data
.texts .texts
.into_iter() .into_iter()
.map(|v| json!({"task_type": task_type, "content": v})) .map(|v| json!({"content": v}))
.collect(); .collect();
let body = json!({ let body = json!({
"instances": instances, "instances": instances,