refactor: rename model.max_concurrent_chunks to model.max_batch_size (#626)

pull/627/head
sigoden 2 weeks ago committed by GitHub
parent f2378e1725
commit 7a089d846e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -90,7 +90,7 @@ clients:
# type: embedding # Embedding model # type: embedding # Embedding model
# max_input_tokens: 2048 # max_input_tokens: 2048
# default_chunk_size: 2000 # default_chunk_size: 2000
# max_concurrent_chunks: 100 # max_batch_size: 100
# - name: xxxx # - name: xxxx
# type: rerank # Rerank model # type: rerank # Rerank model
# max_input_tokens: 2048 # max_input_tokens: 2048

@ -33,17 +33,17 @@
type: embedding type: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 3000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_batch_size: 100
- name: text-embedding-3-small - name: text-embedding-3-small
type: embedding type: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 3000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_batch_size: 100
- name: text-embedding-ada-002 - name: text-embedding-ada-002
type: embedding type: embedding
max_input_tokens: 8191 max_input_tokens: 8191
default_chunk_size: 3000 default_chunk_size: 3000
max_concurrent_chunks: 100 max_batch_size: 100
- platform: gemini - platform: gemini
# docs: # docs:
@ -77,7 +77,7 @@
type: embedding type: embedding
max_input_tokens: 2048 max_input_tokens: 2048
default_chunk_size: 1500 default_chunk_size: 1500
max_concurrent_chunks: 5 max_batch_size: 5
- platform: claude - platform: claude
# docs: # docs:
@ -176,12 +176,12 @@
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 96 max_batch_size: 96
- name: embed-multilingual-v3.0 - name: embed-multilingual-v3.0
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 96 max_batch_size: 96
- name: rerank-english-v3.0 - name: rerank-english-v3.0
type: rerank type: rerank
max_input_tokens: 4096 max_input_tokens: 4096
@ -307,12 +307,12 @@
type: embedding type: embedding
max_input_tokens: 3072 max_input_tokens: 3072
default_chunk_size: 2000 default_chunk_size: 2000
max_concurrent_chunks: 5 max_batch_size: 5
- name: text-multilingual-embedding-002 - name: text-multilingual-embedding-002
type: embedding type: embedding
max_input_tokens: 3072 max_input_tokens: 3072
default_chunk_size: 2000 default_chunk_size: 2000
max_concurrent_chunks: 5 max_batch_size: 5
- platform: vertexai-claude - platform: vertexai-claude
# docs: # docs:
@ -460,12 +460,12 @@
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: '@cf/baai/bge-large-en-v1.5' - name: '@cf/baai/bge-large-en-v1.5'
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- platform: replicate - platform: replicate
# docs: # docs:
@ -578,7 +578,7 @@
type: embedding type: embedding
max_input_tokens: 2048 max_input_tokens: 2048
default_chunk_size: 1500 default_chunk_size: 1500
max_concurrent_chunks: 25 max_batch_size: 25
- platform: moonshot - platform: moonshot
# docs: # docs:
@ -721,12 +721,12 @@
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 30 max_batch_size: 30
- name: thenlper/gte-large - name: thenlper/gte-large
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 30 max_batch_size: 30
- platform: deepinfra - platform: deepinfra
# docs: # docs:
@ -773,52 +773,52 @@
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: BAAI/bge-base-en-v1.5 - name: BAAI/bge-base-en-v1.5
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: BAAI/bge-m3 - name: BAAI/bge-m3
type: embedding type: embedding
max_input_tokens: 8192 max_input_tokens: 8192
default_chunk_size: 2000 default_chunk_size: 2000
max_concurrent_chunks: 100 max_batch_size: 100
- name: intfloat/e5-base-v2 - name: intfloat/e5-base-v2
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: intfloat/e5-large-v2 - name: intfloat/e5-large-v2
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: intfloat/multilingual-e5-large - name: intfloat/multilingual-e5-large
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: sentence-transformers/all-MiniLM-L6-v2 - name: sentence-transformers/all-MiniLM-L6-v2
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: sentence-transformers/paraphrase-MiniLM-L6-v2 - name: sentence-transformers/paraphrase-MiniLM-L6-v2
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: thenlper/gte-base - name: thenlper/gte-base
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: thenlper/gte-large - name: thenlper/gte-large
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- platform: fireworks - platform: fireworks
# docs: # docs:
@ -876,22 +876,22 @@
type: embedding type: embedding
max_input_tokens: 8192 max_input_tokens: 8192
default_chunk_size: 1500 default_chunk_size: 1500
max_concurrent_chunks: 100 max_batch_size: 100
- name: WhereIsAI/UAE-Large-V1 - name: WhereIsAI/UAE-Large-V1
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: thenlper/gte-large - name: thenlper/gte-large
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: thenlper/gte-base - name: thenlper/gte-base
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- platform: openrouter - platform: openrouter
# docs: # docs:
@ -1072,7 +1072,7 @@
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- platform: together - platform: together
# docs: # docs:
@ -1108,19 +1108,19 @@
max_input_tokens: 32768 max_input_tokens: 32768
input_price: 0.9 input_price: 0.9
output_price: 0.9 output_price: 0.9
max_concurrent_chunks: 100 max_batch_size: 100
- name: WhereIsAI/UAE-Large-V1 - name: WhereIsAI/UAE-Large-V1
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: BAAI/bge-large-en-v1.5 - name: BAAI/bge-large-en-v1.5
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100
- name: BAAI/bge-base-en-v1.5 - name: BAAI/bge-base-en-v1.5
type: embedding type: embedding
max_input_tokens: 512 max_input_tokens: 512
default_chunk_size: 1000 default_chunk_size: 1000
max_concurrent_chunks: 100 max_batch_size: 100

@ -392,7 +392,7 @@ pub trait Client: Sync + Send {
async fn embeddings(&self, data: EmbeddingsData) -> Result<Vec<Vec<f32>>> { async fn embeddings(&self, data: EmbeddingsData) -> Result<Vec<Vec<f32>>> {
let client = self.build_client()?; let client = self.build_client()?;
self.model().guard_max_concurrent_chunks(&data)?; self.model().guard_max_batch_size(&data)?;
self.embeddings_inner(&client, data) self.embeddings_inner(&client, data)
.await .await
.context("Failed to fetch embeddings") .context("Failed to fetch embeddings")

@ -175,8 +175,8 @@ impl Model {
self.data.default_chunk_size.unwrap_or(1000) self.data.default_chunk_size.unwrap_or(1000)
} }
pub fn max_concurrent_chunks(&self) -> usize { pub fn max_batch_size(&self) -> usize {
self.data.max_concurrent_chunks.unwrap_or(1) self.data.max_batch_size.unwrap_or(1)
} }
pub fn max_tokens_param(&self) -> Option<isize> { pub fn max_tokens_param(&self) -> Option<isize> {
@ -234,9 +234,9 @@ impl Model {
Ok(()) Ok(())
} }
pub fn guard_max_concurrent_chunks(&self, data: &EmbeddingsData) -> Result<()> { pub fn guard_max_batch_size(&self, data: &EmbeddingsData) -> Result<()> {
if data.texts.len() > self.max_concurrent_chunks() { if data.texts.len() > self.max_batch_size() {
bail!("Exceed max_concurrent_chunks limit"); bail!("Exceed max_batch_size limit");
} }
Ok(()) Ok(())
} }
@ -262,7 +262,7 @@ pub struct ModelData {
// embedding-only properties // embedding-only properties
pub default_chunk_size: Option<usize>, pub default_chunk_size: Option<usize>,
pub max_concurrent_chunks: Option<usize>, pub max_batch_size: Option<usize>,
} }
impl ModelData { impl ModelData {

@ -414,13 +414,13 @@ impl Rag {
) -> Result<EmbeddingsOutput> { ) -> Result<EmbeddingsOutput> {
let EmbeddingsData { texts, query } = data; let EmbeddingsData { texts, query } = data;
let mut output = vec![]; let mut output = vec![];
let chunks = texts.chunks(self.embedding_model.max_concurrent_chunks()); let batch_chunks = texts.chunks(self.embedding_model.max_batch_size());
let chunks_len = chunks.len(); let batch_chunks_len = batch_chunks.len();
progress( progress(
&progress_tx, &progress_tx,
format!("Creating embeddings [1/{chunks_len}]"), format!("Creating embeddings [1/{batch_chunks_len}]"),
); );
for (index, texts) in chunks.enumerate() { for (index, texts) in batch_chunks.enumerate() {
let chunk_data = EmbeddingsData { let chunk_data = EmbeddingsData {
texts: texts.to_vec(), texts: texts.to_vec(),
query, query,
@ -433,7 +433,7 @@ impl Rag {
output.extend(chunk_output); output.extend(chunk_output);
progress( progress(
&progress_tx, &progress_tx,
format!("Creating embeddings [{}/{chunks_len}]", index + 1), format!("Creating embeddings [{}/{batch_chunks_len}]", index + 1),
); );
} }
Ok(output) Ok(output)

Loading…
Cancel
Save