community[patch] : [Fiddler] ensure dataset is not added if model is present (#19293)

**Description:**
- minor PR to speed up onboarding by not trying to add a dataset, if a
model is already present.
- replace batch publish API with streaming when single events are
published.

**Dependencies:** any dependencies required for this change
**Twitter handle:** behalder

Co-authored-by: Barun Halder <barun@fiddler.ai>
pull/19205/head^2
Barun Amalkumar Halder 3 months ago committed by GitHub
parent 6e090280fd
commit 9246ec6b36
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -131,43 +131,42 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
dataset_info.columns[i].data_type = self.fdl.DataType.CATEGORY
dataset_info.columns[i].possible_values = [SUCCESS, FAILURE]
if self.model not in self.fiddler_client.get_dataset_names(self.project):
print( # noqa: T201
f"adding dataset {self.model} to project {self.project}."
"This only has to be done once."
)
try:
self.fiddler_client.upload_dataset(
project_id=self.project,
dataset_id=self.model,
dataset={"train": self._df},
info=dataset_info,
)
except Exception as e:
if self.model not in self.fiddler_client.get_model_names(self.project):
if self.model not in self.fiddler_client.get_dataset_names(self.project):
print( # noqa: T201
f"Error adding dataset {self.model}: {e}."
"Fiddler integration will not work."
f"adding dataset {self.model} to project {self.project}."
"This only has to be done once."
)
raise e
model_info = self.fdl.ModelInfo.from_dataset_info(
dataset_info=dataset_info,
dataset_id="train",
model_task=self.fdl.ModelTask.LLM,
features=[PROMPT, CONTEXT, RESPONSE],
target=FEEDBACK,
metadata_cols=[
RUN_ID,
TOTAL_TOKENS,
PROMPT_TOKENS,
COMPLETION_TOKENS,
MODEL_NAME,
DURATION,
],
custom_features=self.custom_features,
)
if self.model not in self.fiddler_client.get_model_names(self.project):
try:
self.fiddler_client.upload_dataset(
project_id=self.project,
dataset_id=self.model,
dataset={"train": self._df},
info=dataset_info,
)
except Exception as e:
print( # noqa: T201
f"Error adding dataset {self.model}: {e}."
"Fiddler integration will not work."
)
raise e
model_info = self.fdl.ModelInfo.from_dataset_info(
dataset_info=dataset_info,
dataset_id="train",
model_task=self.fdl.ModelTask.LLM,
features=[PROMPT, CONTEXT, RESPONSE],
target=FEEDBACK,
metadata_cols=[
RUN_ID,
TOTAL_TOKENS,
PROMPT_TOKENS,
COMPLETION_TOKENS,
MODEL_NAME,
DURATION,
],
custom_features=self.custom_features,
)
print( # noqa: T201
f"adding model {self.model} to project {self.project}."
"This only has to be done once." # noqa: T201
@ -286,7 +285,13 @@ class FiddlerCallbackHandler(BaseCallbackHandler):
df[key] = [value] * prompt_count if isinstance(value, int) else value
try:
self.fiddler_client.publish_events_batch(self.project, self.model, df)
if df.shape[0] > 1:
self.fiddler_client.publish_events_batch(self.project, self.model, df)
else:
df_dict = df.to_dict(orient="records")
self.fiddler_client.publish_event(
self.project, self.model, event=df_dict[0]
)
except Exception as e:
print( # noqa: T201
f"Error publishing events to fiddler: {e}. continuing..."

Loading…
Cancel
Save