imaginAIry/imaginairy/utils/tile_up.py

92 lines
3.1 KiB
Python

import logging
import math
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import torch
from torch import Tensor
logger = logging.getLogger(__name__)
def tile_process(
img: "Tensor",
scale: int,
model: "torch.nn.Module",
tile_size: int = 512,
tile_pad: int = 50,
) -> "Tensor":
"""
Process an image by tiling it, processing each tile, and then merging them back into one image.
Args:
img (Tensor): The input image tensor.
scale (int): The scale factor for the image.
tile_size (int): The size of each tile.
tile_pad (int): The padding for each tile.
model (torch.nn.Module): The model used for processing the tile.
Returns:
Tensor: The processed output image.
"""
import torch
batch, channel, height, width = img.shape
output_height = height * scale
output_width = width * scale
output_shape = (batch, channel, output_height, output_width)
# Initialize the output tensor
output = img.new_zeros(output_shape)
tiles_x = math.ceil(width / tile_size)
tiles_y = math.ceil(height / tile_size)
logger.debug(f"Tiling with {tiles_x}x{tiles_y} ({tiles_x*tiles_y}) tiles")
for y in range(tiles_y):
for x in range(tiles_x):
# Calculate the input tile coordinates with and without padding
ofs_x, ofs_y = x * tile_size, y * tile_size
input_start_x, input_end_x = ofs_x, min(ofs_x + tile_size, width)
input_start_y, input_end_y = ofs_y, min(ofs_y + tile_size, height)
padded_start_x, padded_end_x = (
max(input_start_x - tile_pad, 0),
min(input_end_x + tile_pad, width),
)
padded_start_y, padded_end_y = (
max(input_start_y - tile_pad, 0),
min(input_end_y + tile_pad, height),
)
# Extract the input tile
input_tile = img[
:, :, padded_start_y:padded_end_y, padded_start_x:padded_end_x
]
# Process the tile
with torch.no_grad():
output_tile = model(input_tile)
# Calculate the output tile coordinates
output_start_x, output_end_x = input_start_x * scale, input_end_x * scale
output_start_y, output_end_y = input_start_y * scale, input_end_y * scale
tile_output_start_x = (input_start_x - padded_start_x) * scale
tile_output_end_x = (
tile_output_start_x + (input_end_x - input_start_x) * scale
)
tile_output_start_y = (input_start_y - padded_start_y) * scale
tile_output_end_y = (
tile_output_start_y + (input_end_y - input_start_y) * scale
)
# Place the processed tile in the output image
output[:, :, output_start_y:output_end_y, output_start_x:output_end_x] = (
output_tile[
:,
:,
tile_output_start_y:tile_output_end_y,
tile_output_start_x:tile_output_end_x,
]
)
return output