diff --git a/Argcfile.sh b/Argcfile.sh index 7433e26..5706aa5 100755 --- a/Argcfile.sh +++ b/Argcfile.sh @@ -80,7 +80,6 @@ test-server() { OPENAI_COMPATIBLE_PLATFORMS=( \ openai,gpt-3.5-turbo,https://api.openai.com/v1 \ - anyscale,meta-llama/Meta-Llama-3-8B-Instruct,https://api.endpoints.anyscale.com/v1 \ deepinfra,meta-llama/Meta-Llama-3-8B-Instruct,https://api.deepinfra.com/v1/openai \ deepseek,deepseek-chat,https://api.deepseek.com \ fireworks,accounts/fireworks/models/llama-v3-8b-instruct,https://api.fireworks.ai/inference/v1 \ diff --git a/config.example.yaml b/config.example.yaml index 003b42a..89407a6 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -260,12 +260,6 @@ clients: name: lingyiwanwu api_key: xxx # ENV: {client}_API_KEY - # See https://docs.endpoints.anyscale.com/ - - type: openai-compatible - name: anyscale - api_base: https://api.endpoints.anyscale.com/v1 - api_key: xxx # ENV: {client}_API_KEY - # See https://deepinfra.com/docs - type: openai-compatible name: deepinfra diff --git a/models.yaml b/models.yaml index 3e9ba7b..43fd5bf 100644 --- a/models.yaml +++ b/models.yaml @@ -239,22 +239,21 @@ models: - name: llama3-8b-8192 max_input_tokens: 8192 - input_price: 0.05 - output_price: 0.08 + input_price: 0 + output_price: 0 supports_function_calling: true - name: llama3-70b-8192 max_input_tokens: 8192 - input_price: 0.59 - output_price: 0.79 - supports_function_calling: true + input_price: 0 + output_price: 0 - name: mixtral-8x7b-32768 max_input_tokens: 32768 - input_price: 0.24 - output_price: 0.24 - - name: gemma-7b-it + input_price: 0 + output_price: 0 + - name: gemma2-9b-it max_input_tokens: 8192 - input_price: 0.07 - output_price: 0.07 + input_price: 0 + output_price: 0 - platform: vertexai # docs: @@ -284,6 +283,13 @@ input_price: 0.125 output_price: 0.375 supports_function_calling: true + - name: textembedding-gecko@003 + type: embedding + max_input_tokens: 3072 + input_price: 0.025 + output_vector_size: 2048 + default_chunk_size: 3000 + max_batch_size: 5 - name: text-embedding-004 type: embedding max_input_tokens: 3072 @@ -481,17 +487,17 @@ # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Nlks5zkzu # - https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7 models: - - name: ernie-4.0-turbo-8k + - name: ernie-4.0-turbo-8k-preview max_input_tokens: 8192 input_price: 4.2 output_price: 8.4 supports_function_calling: true - - name: ernie-4.0-8k-0613 + - name: ernie-4.0-8k-preview max_input_tokens: 8192 input_price: 16.8 output_price: 16.8 supports_function_calling: true - - name: ernie-3.5-8k-0613 + - name: ernie-3.5-8k-preview max_input_tokens: 8192 input_price: 1.68 output_price: 1.68 @@ -514,13 +520,6 @@ output_vector_size: 1024 default_chunk_size: 1000 max_batch_size: 16 - - name: tao_8k - type: embedding - max_input_tokens: 8192 - input_price: 0.28 - output_vector_size: 1024 - default_chunk_size: 2000 - max_batch_size: 1 - name: bce_reranker_base type: reranker max_input_tokens: 1024 @@ -616,6 +615,11 @@ input_price: 14 output_price: 14 supports_function_calling: true + - name: glm-4-alltools + max_input_tokens: 2048 + input_price: 14 + output_price: 14 + supports_function_calling: true - name: glm-4-airx max_input_tokens: 8092 input_price: 1.4 @@ -678,50 +682,6 @@ input_price: 0.14 output_price: 0.14 -- platform: anyscale - # docs: - # - https://docs.anyscale.com/endpoints/text-generation/query-a-model - # - https://www.anyscale.com/pricing-detail - models: - - name: meta-llama/Meta-Llama-3-8B-Instruct - max_input_tokens: 8192 - input_price: 0.15 - output_price: 0.15 - - name: meta-llama/Meta-Llama-3-70B-Instruct - max_input_tokens: 8192 - input_price: 1.0 - output_price: 1.0 - - name: mistralai/Mistral-7B-Instruct-v0.1 - max_input_tokens: 16384 - input_price: 0.15 - output_price: 0.15 - - name: mistralai/Mixtral-8x7B-Instruct-v0.1 - max_input_tokens: 32768 - input_price: 0.50 - output_price: 0.50 - - name: mistralai/Mixtral-8x22B-Instruct-v0.1 - max_input_tokens: 65536 - input_price: 0.90 - output_price: 0.90 - - name: google/gemma-7b-it - max_input_tokens: 8192 - input_price: 0.15 - output_price: 0.15 - - name: BAAI/bge-large-en-v1.5 - type: embedding - max_input_tokens: 512 - input_price: 0.05 - output_vector_size: 1024 - default_chunk_size: 1000 - max_batch_size: 30 - - name: thenlper/gte-large - type: embedding - max_input_tokens: 512 - input_price: 0.05 - output_vector_size: 1024 - default_chunk_size: 1000 - max_batch_size: 30 - - platform: deepinfra # docs: # - https://deepinfra.com/models @@ -834,7 +794,7 @@ max_input_tokens: 65536 input_price: 0.9 output_price: 0.9 - - name: accounts/fireworks/models/gemma-7b-it + - name: accounts/fireworks/models/gemma2-9b-it max_input_tokens: 8192 input_price: 0.2 output_price: 0.2 @@ -935,6 +895,10 @@ input_price: 0.125 output_price: 0.375 supports_function_calling: true + - name: google/gemma-2-9b-it + max_input_tokens: 2800000 + input_price: 0.2 + output_price: 0.2 - name: anthropic/claude-3.5-sonnet max_input_tokens: 200000 max_output_tokens: 4096 @@ -1021,7 +985,10 @@ max_input_tokens: 28000 input_price: 1 output_price: 1 - + - name: 01-ai/yi-large + max_input_tokens: 32768 + input_price: 3 + output_price: 3 - platform: octoai # docs: diff --git a/src/client/mod.rs b/src/client/mod.rs index 6878541..b4c0091 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -51,8 +51,7 @@ register_client!( (qianwen, "qianwen", QianwenConfig, QianwenClient), ); -pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 13] = [ - ("anyscale", "https://api.endpoints.anyscale.com/v1"), +pub const OPENAI_COMPATIBLE_PLATFORMS: [(&str, &str); 12] = [ ("deepinfra", "https://api.deepinfra.com/v1/openai"), ("deepseek", "https://api.deepseek.com"), ("fireworks", "https://api.fireworks.ai/inference/v1"), diff --git a/src/client/vertexai.rs b/src/client/vertexai.rs index a4c4050..828c300 100644 --- a/src/client/vertexai.rs +++ b/src/client/vertexai.rs @@ -69,14 +69,10 @@ impl VertexAIClient { let base_url = format!("https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/publishers"); let url = format!("{base_url}/google/models/{}:predict", self.model.name()); - let task_type = match data.query { - true => "RETRIEVAL_DOCUMENT", - false => "QUESTION_ANSWERING", - }; let instances: Vec<_> = data .texts .into_iter() - .map(|v| json!({"task_type": task_type, "content": v})) + .map(|v| json!({"content": v})) .collect(); let body = json!({ "instances": instances,