Nvidia trt model name for stop_stream() (#16997)

just removing some legacy leftover.
pull/17049/merge
Mikhail Khludnev 5 months ago committed by GitHub
parent 2ef69fe11b
commit 2145636f1d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -377,14 +377,14 @@ class StreamingResponseGenerator(queue.Queue):
def __init__(
self,
client: grpcclient.InferenceServerClient,
llm: TritonTensorRTLLM,
request_id: str,
force_batch: bool,
stop_words: Sequence[str],
) -> None:
"""Instantiate the generator class."""
super().__init__()
self.client = client
self.llm = llm
self.request_id = request_id
self._batch = force_batch
self._stop_words = stop_words
@ -397,8 +397,8 @@ class StreamingResponseGenerator(queue.Queue):
"""Return the next retrieved token."""
val = self.get()
if val is None or val in self._stop_words:
self.client.stop_stream(
"tensorrt_llm", self.request_id, signal=not self._batch
self.llm.stop_stream(
self.llm.model_name, self.request_id, signal=not self._batch
)
raise StopIteration()
return val

Loading…
Cancel
Save