debug version

measurements
Your Name 10 months ago
parent 7dc1aa5151
commit e9e506711e

@ -436,30 +436,31 @@ class ModuleContainer(threading.Thread):
blocks = {}
try:
module_uid, block_index = module_uids[0], block_indices[0]
block = load_pretrained_block(
converted_model_name_or_path,
block_index,
config=block_config,
torch_dtype=torch_dtype,
revision=revision,
token=token,
cache_dir=cache_dir,
max_disk_space=max_disk_space,
)
block = convert_block(
block,
block_index,
block_config,
tensor_parallel_devices,
device,
quant_type,
adapters=server_info.adapters,
freeze=True,
token=token,
cache_dir=cache_dir,
max_disk_space=max_disk_space,
)
for module_uid, block_index in zip(module_uids, block_indices):
block = load_pretrained_block(
converted_model_name_or_path,
block_index,
config=block_config,
torch_dtype=torch_dtype,
revision=revision,
token=token,
cache_dir=cache_dir,
max_disk_space=max_disk_space,
)
block = convert_block(
block,
block_index,
block_config,
tensor_parallel_devices,
device,
quant_type,
adapters=server_info.adapters,
freeze=True,
token=token,
cache_dir=cache_dir,
max_disk_space=max_disk_space,
)
blocks[module_uid] = TransformerBackend(
module_uid,
block,

@ -64,6 +64,8 @@ def convert_block(
**kwargs,
)
add_adapter_to_block(block, block_index, adapter_name, adapter_config, adapter_state_dict)
else:
print("NO CONVERSION")
return block

Loading…
Cancel
Save