|
|
|
@ -48,7 +48,48 @@ def import_langkit(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WhyLabsCallbackHandler(BaseCallbackHandler):
|
|
|
|
|
"""WhyLabs CallbackHandler."""
|
|
|
|
|
"""
|
|
|
|
|
Callback Handler for logging to WhyLabs. This callback handler utilizes
|
|
|
|
|
`langkit` to extract features from the prompts & responses when interacting with
|
|
|
|
|
an LLM. These features can be used to guardrail, evaluate, and observe interactions
|
|
|
|
|
over time to detect issues relating to hallucinations, prompt engineering,
|
|
|
|
|
or output validation. LangKit is an LLM monitoring toolkit developed by WhyLabs.
|
|
|
|
|
|
|
|
|
|
Here are some examples of what can be monitored with LangKit:
|
|
|
|
|
* Text Quality
|
|
|
|
|
- readability score
|
|
|
|
|
- complexity and grade scores
|
|
|
|
|
* Text Relevance
|
|
|
|
|
- Similarity scores between prompt/responses
|
|
|
|
|
- Similarity scores against user-defined themes
|
|
|
|
|
- Topic classification
|
|
|
|
|
* Security and Privacy
|
|
|
|
|
- patterns - count of strings matching a user-defined regex pattern group
|
|
|
|
|
- jailbreaks - similarity scores with respect to known jailbreak attempts
|
|
|
|
|
- prompt injection - similarity scores with respect to known prompt attacks
|
|
|
|
|
- refusals - similarity scores with respect to known LLM refusal responses
|
|
|
|
|
* Sentiment and Toxicity
|
|
|
|
|
- sentiment analysis
|
|
|
|
|
- toxicity analysis
|
|
|
|
|
|
|
|
|
|
For more information, see https://docs.whylabs.ai/docs/language-model-monitoring
|
|
|
|
|
or check out the LangKit repo here: https://github.com/whylabs/langkit
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
Args:
|
|
|
|
|
api_key (Optional[str]): WhyLabs API key. Optional because the preferred
|
|
|
|
|
way to specify the API key is with environment variable
|
|
|
|
|
WHYLABS_API_KEY.
|
|
|
|
|
org_id (Optional[str]): WhyLabs organization id to write profiles to.
|
|
|
|
|
Optional because the preferred way to specify the organization id is
|
|
|
|
|
with environment variable WHYLABS_DEFAULT_ORG_ID.
|
|
|
|
|
dataset_id (Optional[str]): WhyLabs dataset id to write profiles to.
|
|
|
|
|
Optional because the preferred way to specify the dataset id is
|
|
|
|
|
with environment variable WHYLABS_DEFAULT_DATASET_ID.
|
|
|
|
|
sentiment (bool): Whether to enable sentiment analysis. Defaults to False.
|
|
|
|
|
toxicity (bool): Whether to enable toxicity analysis. Defaults to False.
|
|
|
|
|
themes (bool): Whether to enable theme analysis. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, logger: Logger):
|
|
|
|
|
"""Initiate the rolling logger"""
|
|
|
|
|