diff --git a/config.example.yaml b/config.example.yaml
index ae9c53f..172a6b2 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -90,7 +90,7 @@ clients:
   #       type: embedding                             # Embedding model
   #       max_input_tokens: 2048
   #       default_chunk_size: 2000                        
-  #       max_concurrent_chunks: 100
+  #       max_batch_size: 100
   #     - name: xxxx
   #       type: rerank                                # Rerank model
   #       max_input_tokens: 2048
diff --git a/models.yaml b/models.yaml
index ccb0c60..be4ecc3 100644
--- a/models.yaml
+++ b/models.yaml
@@ -33,17 +33,17 @@
       type: embedding
       max_input_tokens: 8191
       default_chunk_size: 3000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: text-embedding-3-small
       type: embedding
       max_input_tokens: 8191
       default_chunk_size: 3000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: text-embedding-ada-002
       type: embedding
       max_input_tokens: 8191
       default_chunk_size: 3000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
 
 - platform: gemini 
   # docs:
@@ -77,7 +77,7 @@
       type: embedding
       max_input_tokens: 2048
       default_chunk_size: 1500
-      max_concurrent_chunks: 5
+      max_batch_size: 5
 
 - platform: claude
   # docs:
@@ -176,12 +176,12 @@
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 96
+      max_batch_size: 96
     - name: embed-multilingual-v3.0
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 96
+      max_batch_size: 96
     - name: rerank-english-v3.0
       type: rerank
       max_input_tokens: 4096
@@ -307,12 +307,12 @@
       type: embedding
       max_input_tokens: 3072
       default_chunk_size: 2000
-      max_concurrent_chunks: 5
+      max_batch_size: 5
     - name: text-multilingual-embedding-002
       type: embedding
       max_input_tokens: 3072
       default_chunk_size: 2000
-      max_concurrent_chunks: 5
+      max_batch_size: 5
 
 - platform: vertexai-claude
   # docs:
@@ -460,12 +460,12 @@
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: '@cf/baai/bge-large-en-v1.5'
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
 
 - platform: replicate
   # docs:
@@ -578,7 +578,7 @@
       type: embedding
       max_input_tokens: 2048
       default_chunk_size: 1500
-      max_concurrent_chunks: 25
+      max_batch_size: 25
 
 - platform: moonshot
   # docs:
@@ -721,12 +721,12 @@
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 30
+      max_batch_size: 30
     - name: thenlper/gte-large
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 30
+      max_batch_size: 30
 
 - platform: deepinfra
   # docs:
@@ -773,52 +773,52 @@
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: BAAI/bge-base-en-v1.5
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: BAAI/bge-m3
       type: embedding
       max_input_tokens: 8192
       default_chunk_size: 2000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: intfloat/e5-base-v2
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: intfloat/e5-large-v2
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: intfloat/multilingual-e5-large
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: sentence-transformers/all-MiniLM-L6-v2
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: sentence-transformers/paraphrase-MiniLM-L6-v2
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: thenlper/gte-base
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: thenlper/gte-large
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
 
 - platform: fireworks
   # docs:
@@ -876,22 +876,22 @@
       type: embedding
       max_input_tokens: 8192
       default_chunk_size: 1500
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: WhereIsAI/UAE-Large-V1
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: thenlper/gte-large
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: thenlper/gte-base
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
 
 - platform: openrouter
   # docs:
@@ -1072,7 +1072,7 @@
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
 
 - platform: together
   # docs:
@@ -1108,19 +1108,19 @@
       max_input_tokens: 32768
       input_price: 0.9
       output_price: 0.9
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: WhereIsAI/UAE-Large-V1
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: BAAI/bge-large-en-v1.5
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
+      max_batch_size: 100
     - name: BAAI/bge-base-en-v1.5
       type: embedding
       max_input_tokens: 512
       default_chunk_size: 1000
-      max_concurrent_chunks: 100
\ No newline at end of file
+      max_batch_size: 100
\ No newline at end of file
diff --git a/src/client/common.rs b/src/client/common.rs
index de2fa50..796c632 100644
--- a/src/client/common.rs
+++ b/src/client/common.rs
@@ -392,7 +392,7 @@ pub trait Client: Sync + Send {
 
     async fn embeddings(&self, data: EmbeddingsData) -> Result<Vec<Vec<f32>>> {
         let client = self.build_client()?;
-        self.model().guard_max_concurrent_chunks(&data)?;
+        self.model().guard_max_batch_size(&data)?;
         self.embeddings_inner(&client, data)
             .await
             .context("Failed to fetch embeddings")
diff --git a/src/client/model.rs b/src/client/model.rs
index 0584f82..4c69d78 100644
--- a/src/client/model.rs
+++ b/src/client/model.rs
@@ -175,8 +175,8 @@ impl Model {
         self.data.default_chunk_size.unwrap_or(1000)
     }
 
-    pub fn max_concurrent_chunks(&self) -> usize {
-        self.data.max_concurrent_chunks.unwrap_or(1)
+    pub fn max_batch_size(&self) -> usize {
+        self.data.max_batch_size.unwrap_or(1)
     }
 
     pub fn max_tokens_param(&self) -> Option<isize> {
@@ -234,9 +234,9 @@ impl Model {
         Ok(())
     }
 
-    pub fn guard_max_concurrent_chunks(&self, data: &EmbeddingsData) -> Result<()> {
-        if data.texts.len() > self.max_concurrent_chunks() {
-            bail!("Exceed max_concurrent_chunks limit");
+    pub fn guard_max_batch_size(&self, data: &EmbeddingsData) -> Result<()> {
+        if data.texts.len() > self.max_batch_size() {
+            bail!("Exceed max_batch_size limit");
         }
         Ok(())
     }
@@ -262,7 +262,7 @@ pub struct ModelData {
 
     // embedding-only properties
     pub default_chunk_size: Option<usize>,
-    pub max_concurrent_chunks: Option<usize>,
+    pub max_batch_size: Option<usize>,
 }
 
 impl ModelData {
diff --git a/src/rag/mod.rs b/src/rag/mod.rs
index 628a9e4..116beea 100644
--- a/src/rag/mod.rs
+++ b/src/rag/mod.rs
@@ -414,13 +414,13 @@ impl Rag {
     ) -> Result<EmbeddingsOutput> {
         let EmbeddingsData { texts, query } = data;
         let mut output = vec![];
-        let chunks = texts.chunks(self.embedding_model.max_concurrent_chunks());
-        let chunks_len = chunks.len();
+        let batch_chunks = texts.chunks(self.embedding_model.max_batch_size());
+        let batch_chunks_len = batch_chunks.len();
         progress(
             &progress_tx,
-            format!("Creating embeddings [1/{chunks_len}]"),
+            format!("Creating embeddings [1/{batch_chunks_len}]"),
         );
-        for (index, texts) in chunks.enumerate() {
+        for (index, texts) in batch_chunks.enumerate() {
             let chunk_data = EmbeddingsData {
                 texts: texts.to_vec(),
                 query,
@@ -433,7 +433,7 @@ impl Rag {
             output.extend(chunk_output);
             progress(
                 &progress_tx,
-                format!("Creating embeddings [{}/{chunks_len}]", index + 1),
+                format!("Creating embeddings [{}/{batch_chunks_len}]", index + 1),
             );
         }
         Ok(output)