diff --git a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py index 31ed82ed..700650a5 100644 --- a/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py +++ b/gpt4all-api/gpt4all_api/app/api_v1/routes/completions.py @@ -117,14 +117,13 @@ async def completions(request: CompletionRequest): params["num_return_sequences"] = request.n header = {"Content-Type": "application/json"} - payload = {"parameters": params} if isinstance(request.prompt, list): tasks = [] for prompt in request.prompt: + payload = {"parameters": params} payload["inputs"] = prompt task = gpu_infer(payload, header) tasks.append(task) - results = await asyncio.gather(*tasks) choices = [] @@ -147,6 +146,7 @@ async def completions(request: CompletionRequest): ) else: + payload = {"parameters": params} # If streaming, we need to return a StreamingResponse payload["inputs"] = request.prompt