fix(openllm): update with newer remote client implementation (#11740)

cc @baskaryan

---------

Signed-off-by: Aaron <29749331+aarnphm@users.noreply.github.com>
pull/11745/head
Aaron Pham 10 months ago committed by GitHub
parent 11cdfe44af
commit 6c61315067
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -217,9 +217,9 @@ class OpenLLM(LLM):
def _identifying_params(self) -> IdentifyingParams:
"""Get the identifying parameters."""
if self._client is not None:
self.llm_kwargs.update(self._client.configuration)
model_name = self._client.model_name
model_id = self._client.model_id
self.llm_kwargs.update(self._client._config())
model_name = self._client._metadata()["model_name"]
model_id = self._client._metadata()["model_id"]
else:
if self._runner is None:
raise ValueError("Runner must be initialized.")
@ -265,7 +265,9 @@ class OpenLLM(LLM):
self._identifying_params["model_name"], **copied
)
if self._client:
res = self._client.query(prompt, **config.model_dump(flatten=True))
res = self._client.generate(
prompt, **config.model_dump(flatten=True)
).responses[0]
else:
assert self._runner is not None
res = self._runner(prompt, **config.model_dump(flatten=True))
@ -300,9 +302,10 @@ class OpenLLM(LLM):
self._identifying_params["model_name"], **copied
)
if self._client:
res = await self._client.acall(
"generate", prompt, **config.model_dump(flatten=True)
)
async_client = openllm.client.AsyncHTTPClient(self.server_url)
res = (
await async_client.generate(prompt, **config.model_dump(flatten=True))
).responses[0]
else:
assert self._runner is not None
(

Loading…
Cancel
Save