diff --git a/setup.cfg b/setup.cfg index cf14434..c8dbc9a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ install_requires = transformers>=4.32.0,<5.0.0 # if you change this, please also change version assert in petals/__init__.py speedtest-cli==2.1.3 pydantic>=1.10,<2.0 # 2.0 is incompatible with hivemind yet - hivemind @ git+https://github.com/learning-at-home/hivemind + hivemind==1.1.10.post2 tensor_parallel==1.0.23 humanfriendly async-timeout>=4.0.2 diff --git a/src/petals/models/bloom/model.py b/src/petals/models/bloom/model.py index cf83822..53e4a98 100644 --- a/src/petals/models/bloom/model.py +++ b/src/petals/models/bloom/model.py @@ -43,7 +43,7 @@ class DistributedBloomModel(FromPretrainedMixin, PTuneMixin, BloomModel): attention_mask: Optional[torch.Tensor] = None, head_mask: Optional[torch.LongTensor] = None, inputs_embeds: Optional[torch.LongTensor] = None, - use_cache: Optional[bool] = None, + use_cache: Optional[bool] = None, # Not used here but needed for HF Transformers compatibility output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -63,7 +63,6 @@ class DistributedBloomModel(FromPretrainedMixin, PTuneMixin, BloomModel): attention_mask is None or (attention_mask == 1).all() ), f"Custom attention masks are not supported, {attention_mask=}" assert head_mask is None, f"Custom head masks are not supported, {head_mask=}" - assert use_cache is None or use_cache, f"{use_cache=} is not supported" assert not output_attentions, f"{output_attentions=} is not supported" assert not output_hidden_states, f"{output_hidden_states=} is not supported" assert return_dict is None or return_dict, f"{return_dict=} is not supported" diff --git a/src/petals/models/llama/model.py b/src/petals/models/llama/model.py index a9dfcc1..cf7d150 100644 --- a/src/petals/models/llama/model.py +++ b/src/petals/models/llama/model.py @@ -43,7 +43,7 @@ class DistributedLlamaModel(FromPretrainedMixin, PTuneMixin, LlamaModel): position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[RemotePastKeyValues] = None, inputs_embeds: Optional[torch.FloatTensor] = None, - use_cache: Optional[bool] = None, + use_cache: Optional[bool] = None, # Not used here but needed for HF Transformers compatibility output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, @@ -65,7 +65,6 @@ class DistributedLlamaModel(FromPretrainedMixin, PTuneMixin, LlamaModel): assert ( position_ids is None or (position_ids[:, 1:] - position_ids[:, :-1] == 1).all() ), f"Non-consecutive position_ids are not supported, {position_ids=}" - assert use_cache is None or use_cache, f"{use_cache=} is not supported" assert not output_attentions, f"{output_attentions=} is not supported" assert not output_hidden_states, f"{output_hidden_states=} is not supported" assert return_dict is None or return_dict, f"{return_dict=} is not supported"