Added gpt max logit choices

laurel/helm
Laurel Orr 2 years ago
parent 5d79281c4c
commit e3d0140fe6

@ -19,6 +19,7 @@ from transformers import (
from manifest.api.models.model import Model
MODEL_REGISTRY = {
"EleutherAI/gpt-neo-125M": GPTNeoForCausalLM,
"EleutherAI/gpt-neo-1.3B": GPTNeoForCausalLM,
"EleutherAI/gpt-neo-2.7B": GPTNeoForCausalLM,
"EleutherAI/gpt-j-6B": GPTJForCausalLM,
@ -303,44 +304,105 @@ class HuggingFaceModel(Model):
the returned gold choice
"""
max_input_len = self.pipeline.max_length
# Adapted from https://github.com/bigscience-workshop/t-zero
tokenized_inputs = self.pipeline.tokenizer(
prompt,
padding="longest",
max_length=max_input_len,
truncation=True,
add_special_tokens=False,
)
# Get max target length
max_target_len = max(
[
len(self.pipeline.tokenizer(ans_choi)["input_ids"])
if self.is_encdec:
# Adapted from https://github.com/bigscience-workshop/t-zero
tokenized_inputs = self.pipeline.tokenizer(
prompt,
padding="longest",
max_length=max_input_len,
truncation=True,
add_special_tokens=False,
)
# Get max target length
max_target_len = max(
[
len(self.pipeline.tokenizer(ans_choi)["input_ids"])
for ans_choi in gold_choices
]
)
tokenized_targets = [
self.pipeline.tokenizer(
ans_choi,
# padding is on the right here.
padding="max_length",
max_length=min(max_target_len, max_input_len),
truncation=True,
)
for ans_choi in gold_choices
]
)
tokenized_targets = [
self.pipeline.tokenizer(
ans_choi,
# padding is on the right here.
padding="max_length",
max_length=min(max_target_len, max_input_len),
# Repeat input ids for each choice to form a batch
features = {
k: [tokenized_inputs[k] for _ in range(len(gold_choices))]
for k in tokenized_inputs.keys()
}
# Add choice tokens + mask
features["labels"] = [
tokenized_targets[k]["input_ids"] for k in range(len(gold_choices))
]
features["labels_attention_mask"] = [
tokenized_targets[k]["attention_mask"] for k in range(len(gold_choices))
]
else:
tokenized_inputs = self.pipeline.tokenizer(
prompt,
max_length=max_input_len,
truncation=True,
add_special_tokens=False,
)
for ans_choi in gold_choices
]
tokenized_targets = [
self.pipeline.tokenizer(
# Add starting whitespace fo gpt
ans_choi if ans_choi.startswith((" ", "\n")) else f" {ans_choi}",
max_length=max_input_len,
truncation=True,
)
for ans_choi in gold_choices
]
features = {
k: [] for k in list(tokenized_inputs.keys()) + ["labels_attention_mask"]
}
max_effective_input_len = 0
for tokenized_targ in tokenized_targets:
for k in tokenized_inputs.keys():
# Make sure to leave room for the outputs
features[k].append(
tokenized_inputs[k][
: min(
len(tokenized_inputs[k]),
max_input_len - len(tokenized_targ[k]),
)
]
+ tokenized_targ[k]
)
max_effective_input_len = max(
max_effective_input_len, len(features[k][-1])
)
# Manuall add labels_attention_mask
features["labels_attention_mask"].append(
[0]
* min(
len(tokenized_inputs["input_ids"]),
max_input_len - len(tokenized_targ["input_ids"]),
)
+ [1] * len(tokenized_targ["input_ids"])
)
# Repeat input ids for each choice to form a batch
features = {
k: [tokenized_inputs[k] for _ in range(len(gold_choices))]
for k in tokenized_inputs.keys()
}
# Add choice tokens + mask
features["labels"] = [
tokenized_targets[k]["input_ids"] for k in range(len(gold_choices))
]
features["labels_attention_mask"] = [
tokenized_targets[k]["attention_mask"] for k in range(len(gold_choices))
]
# Manually pad to max effective length
for k in features.keys():
for i in range(len(features[k])):
if k == "input_ids":
features[k][i] += [self.pipeline.tokenizer.pad_token_id] * (
max_effective_input_len - len(features[k][i])
)
elif k in ["attention_mask", "labels_attention_mask"]:
features[k][i] += [0] * (
max_effective_input_len - len(features[k][i])
)
else:
raise ValueError(f"Unknown key {k} for decoder only models")
features["labels"] = features["input_ids"]
# Convert to tensors
tensor_features = {}
for k in features:
@ -356,6 +418,7 @@ class HuggingFaceModel(Model):
]
stacked_logits = torch.vstack(logits)
# Compute most likely option
# Adapted from https://github.com/bigscience-workshop/t-zero
masked_log_probs = tensor_features["labels_attention_mask"].unsqueeze(
-1
) * torch.log_softmax(stacked_logits, dim=-1)

Loading…
Cancel
Save