build: vendorize facexlib
had too many unused sub-dependencies also monkeypatch the download mechanism to use our standard download functionpull/444/head
parent
4521d518ac
commit
5bbb09f69e
@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2020 Xintao Wang
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
@ -0,0 +1,7 @@
|
|||||||
|
# flake8: noqa
|
||||||
|
from .alignment import *
|
||||||
|
from .detection import *
|
||||||
|
from .recognition import *
|
||||||
|
from .tracking import *
|
||||||
|
from .utils import *
|
||||||
|
from .visualization import *
|
@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
## Landmarks
|
||||||
|
|
||||||
|
- 5 landmarks
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="../../assets/landmarks_5.jpg", height="300">
|
||||||
|
</p>
|
||||||
|
|
||||||
|
- 68 landmarks
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="../../assets/landmarks_68.png", height="400">
|
||||||
|
</p>
|
||||||
|
|
||||||
|
- 98 landmarks
|
||||||
|
|
||||||
|
<p align="center">
|
||||||
|
<img src="../../assets/landmarks_98.png", height="500">
|
||||||
|
</p>
|
@ -0,0 +1,22 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .awing_arch import FAN
|
||||||
|
from .convert_98_to_68_landmarks import landmark_98_to_68
|
||||||
|
|
||||||
|
__all__ = ['FAN', 'landmark_98_to_68']
|
||||||
|
|
||||||
|
|
||||||
|
def init_alignment_model(model_name, half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'awing_fan':
|
||||||
|
model = FAN(num_modules=4, num_landmarks=98, device=device)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/alignment_WFLW_4HG.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
model.load_state_dict(torch.load(model_path)['state_dict'], strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,378 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_points(heatmaps):
|
||||||
|
# change heatmaps to landmarks
|
||||||
|
B, N, H, W = heatmaps.shape
|
||||||
|
HW = H * W
|
||||||
|
BN_range = np.arange(B * N)
|
||||||
|
|
||||||
|
heatline = heatmaps.reshape(B, N, HW)
|
||||||
|
indexes = np.argmax(heatline, axis=2)
|
||||||
|
|
||||||
|
preds = np.stack((indexes % W, indexes // W), axis=2)
|
||||||
|
preds = preds.astype(np.float, copy=False)
|
||||||
|
|
||||||
|
inr = indexes.ravel()
|
||||||
|
|
||||||
|
heatline = heatline.reshape(B * N, HW)
|
||||||
|
x_up = heatline[BN_range, inr + 1]
|
||||||
|
x_down = heatline[BN_range, inr - 1]
|
||||||
|
# y_up = heatline[BN_range, inr + W]
|
||||||
|
|
||||||
|
if any((inr + W) >= 4096):
|
||||||
|
y_up = heatline[BN_range, 4095]
|
||||||
|
else:
|
||||||
|
y_up = heatline[BN_range, inr + W]
|
||||||
|
if any((inr - W) <= 0):
|
||||||
|
y_down = heatline[BN_range, 0]
|
||||||
|
else:
|
||||||
|
y_down = heatline[BN_range, inr - W]
|
||||||
|
|
||||||
|
think_diff = np.sign(np.stack((x_up - x_down, y_up - y_down), axis=1))
|
||||||
|
think_diff *= .25
|
||||||
|
|
||||||
|
preds += think_diff.reshape(B, N, 2)
|
||||||
|
preds += .5
|
||||||
|
return preds
|
||||||
|
|
||||||
|
|
||||||
|
class AddCoordsTh(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, x_dim=64, y_dim=64, with_r=False, with_boundary=False):
|
||||||
|
super(AddCoordsTh, self).__init__()
|
||||||
|
self.x_dim = x_dim
|
||||||
|
self.y_dim = y_dim
|
||||||
|
self.with_r = with_r
|
||||||
|
self.with_boundary = with_boundary
|
||||||
|
|
||||||
|
def forward(self, input_tensor, heatmap=None):
|
||||||
|
"""
|
||||||
|
input_tensor: (batch, c, x_dim, y_dim)
|
||||||
|
"""
|
||||||
|
batch_size_tensor = input_tensor.shape[0]
|
||||||
|
|
||||||
|
xx_ones = torch.ones([1, self.y_dim], dtype=torch.int32, device=input_tensor.device)
|
||||||
|
xx_ones = xx_ones.unsqueeze(-1)
|
||||||
|
|
||||||
|
xx_range = torch.arange(self.x_dim, dtype=torch.int32, device=input_tensor.device).unsqueeze(0)
|
||||||
|
xx_range = xx_range.unsqueeze(1)
|
||||||
|
|
||||||
|
xx_channel = torch.matmul(xx_ones.float(), xx_range.float())
|
||||||
|
xx_channel = xx_channel.unsqueeze(-1)
|
||||||
|
|
||||||
|
yy_ones = torch.ones([1, self.x_dim], dtype=torch.int32, device=input_tensor.device)
|
||||||
|
yy_ones = yy_ones.unsqueeze(1)
|
||||||
|
|
||||||
|
yy_range = torch.arange(self.y_dim, dtype=torch.int32, device=input_tensor.device).unsqueeze(0)
|
||||||
|
yy_range = yy_range.unsqueeze(-1)
|
||||||
|
|
||||||
|
yy_channel = torch.matmul(yy_range.float(), yy_ones.float())
|
||||||
|
yy_channel = yy_channel.unsqueeze(-1)
|
||||||
|
|
||||||
|
xx_channel = xx_channel.permute(0, 3, 2, 1)
|
||||||
|
yy_channel = yy_channel.permute(0, 3, 2, 1)
|
||||||
|
|
||||||
|
xx_channel = xx_channel / (self.x_dim - 1)
|
||||||
|
yy_channel = yy_channel / (self.y_dim - 1)
|
||||||
|
|
||||||
|
xx_channel = xx_channel * 2 - 1
|
||||||
|
yy_channel = yy_channel * 2 - 1
|
||||||
|
|
||||||
|
xx_channel = xx_channel.repeat(batch_size_tensor, 1, 1, 1)
|
||||||
|
yy_channel = yy_channel.repeat(batch_size_tensor, 1, 1, 1)
|
||||||
|
|
||||||
|
if self.with_boundary and heatmap is not None:
|
||||||
|
boundary_channel = torch.clamp(heatmap[:, -1:, :, :], 0.0, 1.0)
|
||||||
|
|
||||||
|
zero_tensor = torch.zeros_like(xx_channel)
|
||||||
|
xx_boundary_channel = torch.where(boundary_channel > 0.05, xx_channel, zero_tensor)
|
||||||
|
yy_boundary_channel = torch.where(boundary_channel > 0.05, yy_channel, zero_tensor)
|
||||||
|
if self.with_boundary and heatmap is not None:
|
||||||
|
xx_boundary_channel = xx_boundary_channel.to(input_tensor.device)
|
||||||
|
yy_boundary_channel = yy_boundary_channel.to(input_tensor.device)
|
||||||
|
ret = torch.cat([input_tensor, xx_channel, yy_channel], dim=1)
|
||||||
|
|
||||||
|
if self.with_r:
|
||||||
|
rr = torch.sqrt(torch.pow(xx_channel, 2) + torch.pow(yy_channel, 2))
|
||||||
|
rr = rr / torch.max(rr)
|
||||||
|
ret = torch.cat([ret, rr], dim=1)
|
||||||
|
|
||||||
|
if self.with_boundary and heatmap is not None:
|
||||||
|
ret = torch.cat([ret, xx_boundary_channel, yy_boundary_channel], dim=1)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class CoordConvTh(nn.Module):
|
||||||
|
"""CoordConv layer as in the paper."""
|
||||||
|
|
||||||
|
def __init__(self, x_dim, y_dim, with_r, with_boundary, in_channels, first_one=False, *args, **kwargs):
|
||||||
|
super(CoordConvTh, self).__init__()
|
||||||
|
self.addcoords = AddCoordsTh(x_dim=x_dim, y_dim=y_dim, with_r=with_r, with_boundary=with_boundary)
|
||||||
|
in_channels += 2
|
||||||
|
if with_r:
|
||||||
|
in_channels += 1
|
||||||
|
if with_boundary and not first_one:
|
||||||
|
in_channels += 2
|
||||||
|
self.conv = nn.Conv2d(in_channels=in_channels, *args, **kwargs)
|
||||||
|
|
||||||
|
def forward(self, input_tensor, heatmap=None):
|
||||||
|
ret = self.addcoords(input_tensor, heatmap)
|
||||||
|
last_channel = ret[:, -2:, :, :]
|
||||||
|
ret = self.conv(ret)
|
||||||
|
return ret, last_channel
|
||||||
|
|
||||||
|
|
||||||
|
def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False, dilation=1):
|
||||||
|
'3x3 convolution with padding'
|
||||||
|
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=strd, padding=padding, bias=bias, dilation=dilation)
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(nn.Module):
|
||||||
|
expansion = 1
|
||||||
|
|
||||||
|
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||||
|
# self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = conv3x3(planes, planes)
|
||||||
|
# self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.downsample = downsample
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
|
||||||
|
if self.downsample is not None:
|
||||||
|
residual = self.downsample(x)
|
||||||
|
|
||||||
|
out += residual
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ConvBlock(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_planes, out_planes):
|
||||||
|
super(ConvBlock, self).__init__()
|
||||||
|
self.bn1 = nn.BatchNorm2d(in_planes)
|
||||||
|
self.conv1 = conv3x3(in_planes, int(out_planes / 2))
|
||||||
|
self.bn2 = nn.BatchNorm2d(int(out_planes / 2))
|
||||||
|
self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4), padding=1, dilation=1)
|
||||||
|
self.bn3 = nn.BatchNorm2d(int(out_planes / 4))
|
||||||
|
self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4), padding=1, dilation=1)
|
||||||
|
|
||||||
|
if in_planes != out_planes:
|
||||||
|
self.downsample = nn.Sequential(
|
||||||
|
nn.BatchNorm2d(in_planes),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, bias=False),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.downsample = None
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
out1 = self.bn1(x)
|
||||||
|
out1 = F.relu(out1, True)
|
||||||
|
out1 = self.conv1(out1)
|
||||||
|
|
||||||
|
out2 = self.bn2(out1)
|
||||||
|
out2 = F.relu(out2, True)
|
||||||
|
out2 = self.conv2(out2)
|
||||||
|
|
||||||
|
out3 = self.bn3(out2)
|
||||||
|
out3 = F.relu(out3, True)
|
||||||
|
out3 = self.conv3(out3)
|
||||||
|
|
||||||
|
out3 = torch.cat((out1, out2, out3), 1)
|
||||||
|
|
||||||
|
if self.downsample is not None:
|
||||||
|
residual = self.downsample(residual)
|
||||||
|
|
||||||
|
out3 += residual
|
||||||
|
|
||||||
|
return out3
|
||||||
|
|
||||||
|
|
||||||
|
class HourGlass(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, num_modules, depth, num_features, first_one=False):
|
||||||
|
super(HourGlass, self).__init__()
|
||||||
|
self.num_modules = num_modules
|
||||||
|
self.depth = depth
|
||||||
|
self.features = num_features
|
||||||
|
self.coordconv = CoordConvTh(
|
||||||
|
x_dim=64,
|
||||||
|
y_dim=64,
|
||||||
|
with_r=True,
|
||||||
|
with_boundary=True,
|
||||||
|
in_channels=256,
|
||||||
|
first_one=first_one,
|
||||||
|
out_channels=256,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0)
|
||||||
|
self._generate_network(self.depth)
|
||||||
|
|
||||||
|
def _generate_network(self, level):
|
||||||
|
self.add_module('b1_' + str(level), ConvBlock(256, 256))
|
||||||
|
|
||||||
|
self.add_module('b2_' + str(level), ConvBlock(256, 256))
|
||||||
|
|
||||||
|
if level > 1:
|
||||||
|
self._generate_network(level - 1)
|
||||||
|
else:
|
||||||
|
self.add_module('b2_plus_' + str(level), ConvBlock(256, 256))
|
||||||
|
|
||||||
|
self.add_module('b3_' + str(level), ConvBlock(256, 256))
|
||||||
|
|
||||||
|
def _forward(self, level, inp):
|
||||||
|
# Upper branch
|
||||||
|
up1 = inp
|
||||||
|
up1 = self._modules['b1_' + str(level)](up1)
|
||||||
|
|
||||||
|
# Lower branch
|
||||||
|
low1 = F.avg_pool2d(inp, 2, stride=2)
|
||||||
|
low1 = self._modules['b2_' + str(level)](low1)
|
||||||
|
|
||||||
|
if level > 1:
|
||||||
|
low2 = self._forward(level - 1, low1)
|
||||||
|
else:
|
||||||
|
low2 = low1
|
||||||
|
low2 = self._modules['b2_plus_' + str(level)](low2)
|
||||||
|
|
||||||
|
low3 = low2
|
||||||
|
low3 = self._modules['b3_' + str(level)](low3)
|
||||||
|
|
||||||
|
up2 = F.interpolate(low3, scale_factor=2, mode='nearest')
|
||||||
|
|
||||||
|
return up1 + up2
|
||||||
|
|
||||||
|
def forward(self, x, heatmap):
|
||||||
|
x, last_channel = self.coordconv(x, heatmap)
|
||||||
|
return self._forward(self.depth, x), last_channel
|
||||||
|
|
||||||
|
|
||||||
|
class FAN(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, num_modules=1, end_relu=False, gray_scale=False, num_landmarks=68, device='cuda'):
|
||||||
|
super(FAN, self).__init__()
|
||||||
|
self.device = device
|
||||||
|
self.num_modules = num_modules
|
||||||
|
self.gray_scale = gray_scale
|
||||||
|
self.end_relu = end_relu
|
||||||
|
self.num_landmarks = num_landmarks
|
||||||
|
|
||||||
|
# Base part
|
||||||
|
if self.gray_scale:
|
||||||
|
self.conv1 = CoordConvTh(
|
||||||
|
x_dim=256,
|
||||||
|
y_dim=256,
|
||||||
|
with_r=True,
|
||||||
|
with_boundary=False,
|
||||||
|
in_channels=3,
|
||||||
|
out_channels=64,
|
||||||
|
kernel_size=7,
|
||||||
|
stride=2,
|
||||||
|
padding=3)
|
||||||
|
else:
|
||||||
|
self.conv1 = CoordConvTh(
|
||||||
|
x_dim=256,
|
||||||
|
y_dim=256,
|
||||||
|
with_r=True,
|
||||||
|
with_boundary=False,
|
||||||
|
in_channels=3,
|
||||||
|
out_channels=64,
|
||||||
|
kernel_size=7,
|
||||||
|
stride=2,
|
||||||
|
padding=3)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.conv2 = ConvBlock(64, 128)
|
||||||
|
self.conv3 = ConvBlock(128, 128)
|
||||||
|
self.conv4 = ConvBlock(128, 256)
|
||||||
|
|
||||||
|
# Stacking part
|
||||||
|
for hg_module in range(self.num_modules):
|
||||||
|
if hg_module == 0:
|
||||||
|
first_one = True
|
||||||
|
else:
|
||||||
|
first_one = False
|
||||||
|
self.add_module('m' + str(hg_module), HourGlass(1, 4, 256, first_one))
|
||||||
|
self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256))
|
||||||
|
self.add_module('conv_last' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
|
||||||
|
self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
|
||||||
|
self.add_module('l' + str(hg_module), nn.Conv2d(256, num_landmarks + 1, kernel_size=1, stride=1, padding=0))
|
||||||
|
|
||||||
|
if hg_module < self.num_modules - 1:
|
||||||
|
self.add_module('bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
|
||||||
|
self.add_module('al' + str(hg_module),
|
||||||
|
nn.Conv2d(num_landmarks + 1, 256, kernel_size=1, stride=1, padding=0))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x, _ = self.conv1(x)
|
||||||
|
x = F.relu(self.bn1(x), True)
|
||||||
|
# x = F.relu(self.bn1(self.conv1(x)), True)
|
||||||
|
x = F.avg_pool2d(self.conv2(x), 2, stride=2)
|
||||||
|
x = self.conv3(x)
|
||||||
|
x = self.conv4(x)
|
||||||
|
|
||||||
|
previous = x
|
||||||
|
|
||||||
|
outputs = []
|
||||||
|
boundary_channels = []
|
||||||
|
tmp_out = None
|
||||||
|
for i in range(self.num_modules):
|
||||||
|
hg, boundary_channel = self._modules['m' + str(i)](previous, tmp_out)
|
||||||
|
|
||||||
|
ll = hg
|
||||||
|
ll = self._modules['top_m_' + str(i)](ll)
|
||||||
|
|
||||||
|
ll = F.relu(self._modules['bn_end' + str(i)](self._modules['conv_last' + str(i)](ll)), True)
|
||||||
|
|
||||||
|
# Predict heatmaps
|
||||||
|
tmp_out = self._modules['l' + str(i)](ll)
|
||||||
|
if self.end_relu:
|
||||||
|
tmp_out = F.relu(tmp_out) # HACK: Added relu
|
||||||
|
outputs.append(tmp_out)
|
||||||
|
boundary_channels.append(boundary_channel)
|
||||||
|
|
||||||
|
if i < self.num_modules - 1:
|
||||||
|
ll = self._modules['bl' + str(i)](ll)
|
||||||
|
tmp_out_ = self._modules['al' + str(i)](tmp_out)
|
||||||
|
previous = previous + ll + tmp_out_
|
||||||
|
|
||||||
|
return outputs, boundary_channels
|
||||||
|
|
||||||
|
def get_landmarks(self, img):
|
||||||
|
H, W, _ = img.shape
|
||||||
|
offset = W / 64, H / 64, 0, 0
|
||||||
|
|
||||||
|
img = cv2.resize(img, (256, 256))
|
||||||
|
inp = img[..., ::-1]
|
||||||
|
inp = torch.from_numpy(np.ascontiguousarray(inp.transpose((2, 0, 1)))).float()
|
||||||
|
inp = inp.to(self.device)
|
||||||
|
inp.div_(255.0).unsqueeze_(0)
|
||||||
|
|
||||||
|
outputs, _ = self.forward(inp)
|
||||||
|
out = outputs[-1][:, :-1, :, :]
|
||||||
|
heatmaps = out.detach().cpu().numpy()
|
||||||
|
|
||||||
|
pred = calculate_points(heatmaps).reshape(-1, 2)
|
||||||
|
|
||||||
|
pred *= offset[:2]
|
||||||
|
pred += offset[-2:]
|
||||||
|
|
||||||
|
return pred
|
@ -0,0 +1,82 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def load_txt_file(file_path):
|
||||||
|
"""Load data or string from txt file."""
|
||||||
|
|
||||||
|
with open(file_path, 'r') as cfile:
|
||||||
|
content = cfile.readlines()
|
||||||
|
cfile.close()
|
||||||
|
content = [x.strip() for x in content]
|
||||||
|
num_lines = len(content)
|
||||||
|
return content, num_lines
|
||||||
|
|
||||||
|
|
||||||
|
def anno_parser(anno_path, num_pts, line_offset=0):
|
||||||
|
"""Parse the annotation.
|
||||||
|
Args:
|
||||||
|
anno_path: path of anno file (suffix .txt)
|
||||||
|
num_pts: number of landmarks.
|
||||||
|
line_offset: first point starts, default: 0.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pts: num_pts x 2 (x, y)
|
||||||
|
"""
|
||||||
|
|
||||||
|
data, _ = load_txt_file(anno_path)
|
||||||
|
n_points = num_pts
|
||||||
|
# read points coordinate.
|
||||||
|
pts = np.zeros((n_points, 2), dtype='float32')
|
||||||
|
for point_index in range(n_points):
|
||||||
|
try:
|
||||||
|
pts_list = data[point_index + line_offset].split(',')
|
||||||
|
pts[point_index, 0] = float(pts_list[0])
|
||||||
|
pts[point_index, 1] = float(pts_list[1])
|
||||||
|
except ValueError:
|
||||||
|
print(f'Error in loading points in {anno_path}')
|
||||||
|
return pts
|
||||||
|
|
||||||
|
|
||||||
|
def landmark_98_to_68(landmark_98):
|
||||||
|
"""Transfer 98 landmark positions to 68 landmark positions.
|
||||||
|
Args:
|
||||||
|
landmark_98(numpy array): Polar coordinates of 98 landmarks, (98, 2)
|
||||||
|
Returns:
|
||||||
|
landmark_68(numpy array): Polar coordinates of 98 landmarks, (68, 2)
|
||||||
|
"""
|
||||||
|
|
||||||
|
landmark_68 = np.zeros((68, 2), dtype='float32')
|
||||||
|
# cheek
|
||||||
|
for i in range(0, 33):
|
||||||
|
if i % 2 == 0:
|
||||||
|
landmark_68[int(i / 2), :] = landmark_98[i, :]
|
||||||
|
# nose
|
||||||
|
for i in range(51, 60):
|
||||||
|
landmark_68[i - 24, :] = landmark_98[i, :]
|
||||||
|
# mouth
|
||||||
|
for i in range(76, 96):
|
||||||
|
landmark_68[i - 28, :] = landmark_98[i, :]
|
||||||
|
# left eyebrow
|
||||||
|
landmark_68[17, :] = landmark_98[33, :]
|
||||||
|
landmark_68[18, :] = (landmark_98[34, :] + landmark_98[41, :]) / 2
|
||||||
|
landmark_68[19, :] = (landmark_98[35, :] + landmark_98[40, :]) / 2
|
||||||
|
landmark_68[20, :] = (landmark_98[36, :] + landmark_98[39, :]) / 2
|
||||||
|
landmark_68[21, :] = (landmark_98[37, :] + landmark_98[38, :]) / 2
|
||||||
|
# right eyebrow
|
||||||
|
landmark_68[22, :] = (landmark_98[42, :] + landmark_98[50, :]) / 2
|
||||||
|
landmark_68[23, :] = (landmark_98[43, :] + landmark_98[49, :]) / 2
|
||||||
|
landmark_68[24, :] = (landmark_98[44, :] + landmark_98[48, :]) / 2
|
||||||
|
landmark_68[25, :] = (landmark_98[45, :] + landmark_98[47, :]) / 2
|
||||||
|
landmark_68[26, :] = landmark_98[46, :]
|
||||||
|
# left eye
|
||||||
|
LUT_landmark_68_left_eye = [36, 37, 38, 39, 40, 41]
|
||||||
|
LUT_landmark_98_left_eye = [60, 61, 63, 64, 65, 67]
|
||||||
|
for idx, landmark_98_index in enumerate(LUT_landmark_98_left_eye):
|
||||||
|
landmark_68[LUT_landmark_68_left_eye[idx], :] = landmark_98[landmark_98_index, :]
|
||||||
|
# right eye
|
||||||
|
LUT_landmark_68_right_eye = [42, 43, 44, 45, 46, 47]
|
||||||
|
LUT_landmark_98_right_eye = [68, 69, 71, 72, 73, 75]
|
||||||
|
for idx, landmark_98_index in enumerate(LUT_landmark_98_right_eye):
|
||||||
|
landmark_68[LUT_landmark_68_right_eye[idx], :] = landmark_98[landmark_98_index, :]
|
||||||
|
|
||||||
|
return landmark_68
|
@ -0,0 +1,20 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .hyperiqa_net import HyperIQA
|
||||||
|
|
||||||
|
|
||||||
|
def init_assessment_model(model_name, half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'hypernet':
|
||||||
|
model = HyperIQA(16, 112, 224, 112, 56, 28, 14, 7)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/assessment_hyperIQA.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
# load the pre-trained hypernet model
|
||||||
|
hypernet_model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
model.hypernet.load_state_dict((torch.load(hypernet_model_path, map_location=lambda storage, loc: storage)))
|
||||||
|
model = model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,298 @@
|
|||||||
|
import torch as torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class HyperIQA(nn.Module):
|
||||||
|
"""
|
||||||
|
Combine the hypernet and target network within a network.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
super(HyperIQA, self).__init__()
|
||||||
|
self.hypernet = HyperNet(*args)
|
||||||
|
|
||||||
|
def forward(self, img):
|
||||||
|
net_params = self.hypernet(img)
|
||||||
|
# build the target network
|
||||||
|
target_net = TargetNet(net_params)
|
||||||
|
for param in target_net.parameters():
|
||||||
|
param.requires_grad = False
|
||||||
|
# predict the face quality
|
||||||
|
pred = target_net(net_params['target_in_vec'])
|
||||||
|
return pred
|
||||||
|
|
||||||
|
|
||||||
|
class HyperNet(nn.Module):
|
||||||
|
"""
|
||||||
|
Hyper network for learning perceptual rules.
|
||||||
|
Args:
|
||||||
|
lda_out_channels: local distortion aware module output size.
|
||||||
|
hyper_in_channels: input feature channels for hyper network.
|
||||||
|
target_in_size: input vector size for target network.
|
||||||
|
target_fc(i)_size: fully connection layer size of target network.
|
||||||
|
feature_size: input feature map width/height for hyper network.
|
||||||
|
Note:
|
||||||
|
For size match, input args must satisfy: 'target_fc(i)_size * target_fc(i+1)_size' is divisible by 'feature_size ^ 2'. # noqa E501
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, lda_out_channels, hyper_in_channels, target_in_size, target_fc1_size, target_fc2_size,
|
||||||
|
target_fc3_size, target_fc4_size, feature_size):
|
||||||
|
super(HyperNet, self).__init__()
|
||||||
|
|
||||||
|
self.hyperInChn = hyper_in_channels
|
||||||
|
self.target_in_size = target_in_size
|
||||||
|
self.f1 = target_fc1_size
|
||||||
|
self.f2 = target_fc2_size
|
||||||
|
self.f3 = target_fc3_size
|
||||||
|
self.f4 = target_fc4_size
|
||||||
|
self.feature_size = feature_size
|
||||||
|
|
||||||
|
self.res = resnet50_backbone(lda_out_channels, target_in_size)
|
||||||
|
|
||||||
|
self.pool = nn.AdaptiveAvgPool2d((1, 1))
|
||||||
|
|
||||||
|
# Conv layers for resnet output features
|
||||||
|
self.conv1 = nn.Sequential(
|
||||||
|
nn.Conv2d(2048, 1024, 1, padding=(0, 0)), nn.ReLU(inplace=True), nn.Conv2d(1024, 512, 1, padding=(0, 0)),
|
||||||
|
nn.ReLU(inplace=True), nn.Conv2d(512, self.hyperInChn, 1, padding=(0, 0)), nn.ReLU(inplace=True))
|
||||||
|
|
||||||
|
# Hyper network part, conv for generating target fc weights, fc for generating target fc biases
|
||||||
|
self.fc1w_conv = nn.Conv2d(
|
||||||
|
self.hyperInChn, int(self.target_in_size * self.f1 / feature_size**2), 3, padding=(1, 1))
|
||||||
|
self.fc1b_fc = nn.Linear(self.hyperInChn, self.f1)
|
||||||
|
|
||||||
|
self.fc2w_conv = nn.Conv2d(self.hyperInChn, int(self.f1 * self.f2 / feature_size**2), 3, padding=(1, 1))
|
||||||
|
self.fc2b_fc = nn.Linear(self.hyperInChn, self.f2)
|
||||||
|
|
||||||
|
self.fc3w_conv = nn.Conv2d(self.hyperInChn, int(self.f2 * self.f3 / feature_size**2), 3, padding=(1, 1))
|
||||||
|
self.fc3b_fc = nn.Linear(self.hyperInChn, self.f3)
|
||||||
|
|
||||||
|
self.fc4w_conv = nn.Conv2d(self.hyperInChn, int(self.f3 * self.f4 / feature_size**2), 3, padding=(1, 1))
|
||||||
|
self.fc4b_fc = nn.Linear(self.hyperInChn, self.f4)
|
||||||
|
|
||||||
|
self.fc5w_fc = nn.Linear(self.hyperInChn, self.f4)
|
||||||
|
self.fc5b_fc = nn.Linear(self.hyperInChn, 1)
|
||||||
|
|
||||||
|
def forward(self, img):
|
||||||
|
feature_size = self.feature_size
|
||||||
|
|
||||||
|
res_out = self.res(img)
|
||||||
|
|
||||||
|
# input vector for target net
|
||||||
|
target_in_vec = res_out['target_in_vec'].view(-1, self.target_in_size, 1, 1)
|
||||||
|
|
||||||
|
# input features for hyper net
|
||||||
|
hyper_in_feat = self.conv1(res_out['hyper_in_feat']).view(-1, self.hyperInChn, feature_size, feature_size)
|
||||||
|
|
||||||
|
# generating target net weights & biases
|
||||||
|
target_fc1w = self.fc1w_conv(hyper_in_feat).view(-1, self.f1, self.target_in_size, 1, 1)
|
||||||
|
target_fc1b = self.fc1b_fc(self.pool(hyper_in_feat).squeeze()).view(-1, self.f1)
|
||||||
|
|
||||||
|
target_fc2w = self.fc2w_conv(hyper_in_feat).view(-1, self.f2, self.f1, 1, 1)
|
||||||
|
target_fc2b = self.fc2b_fc(self.pool(hyper_in_feat).squeeze()).view(-1, self.f2)
|
||||||
|
|
||||||
|
target_fc3w = self.fc3w_conv(hyper_in_feat).view(-1, self.f3, self.f2, 1, 1)
|
||||||
|
target_fc3b = self.fc3b_fc(self.pool(hyper_in_feat).squeeze()).view(-1, self.f3)
|
||||||
|
|
||||||
|
target_fc4w = self.fc4w_conv(hyper_in_feat).view(-1, self.f4, self.f3, 1, 1)
|
||||||
|
target_fc4b = self.fc4b_fc(self.pool(hyper_in_feat).squeeze()).view(-1, self.f4)
|
||||||
|
|
||||||
|
target_fc5w = self.fc5w_fc(self.pool(hyper_in_feat).squeeze()).view(-1, 1, self.f4, 1, 1)
|
||||||
|
target_fc5b = self.fc5b_fc(self.pool(hyper_in_feat).squeeze()).view(-1, 1)
|
||||||
|
|
||||||
|
out = {}
|
||||||
|
out['target_in_vec'] = target_in_vec
|
||||||
|
out['target_fc1w'] = target_fc1w
|
||||||
|
out['target_fc1b'] = target_fc1b
|
||||||
|
out['target_fc2w'] = target_fc2w
|
||||||
|
out['target_fc2b'] = target_fc2b
|
||||||
|
out['target_fc3w'] = target_fc3w
|
||||||
|
out['target_fc3b'] = target_fc3b
|
||||||
|
out['target_fc4w'] = target_fc4w
|
||||||
|
out['target_fc4b'] = target_fc4b
|
||||||
|
out['target_fc5w'] = target_fc5w
|
||||||
|
out['target_fc5b'] = target_fc5b
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(nn.Module):
|
||||||
|
expansion = 4
|
||||||
|
|
||||||
|
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||||
|
super(Bottleneck, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(planes)
|
||||||
|
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||||
|
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.downsample = downsample
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = x
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv3(out)
|
||||||
|
out = self.bn3(out)
|
||||||
|
|
||||||
|
if self.downsample is not None:
|
||||||
|
residual = self.downsample(x)
|
||||||
|
|
||||||
|
out += residual
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ResNetBackbone(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, lda_out_channels, in_chn, block, layers, num_classes=1000):
|
||||||
|
super(ResNetBackbone, self).__init__()
|
||||||
|
self.inplanes = 64
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||||
|
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||||
|
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||||
|
|
||||||
|
# local distortion aware module
|
||||||
|
self.lda1_pool = nn.Sequential(
|
||||||
|
nn.Conv2d(256, 16, kernel_size=1, stride=1, padding=0, bias=False),
|
||||||
|
nn.AvgPool2d(7, stride=7),
|
||||||
|
)
|
||||||
|
self.lda1_fc = nn.Linear(16 * 64, lda_out_channels)
|
||||||
|
|
||||||
|
self.lda2_pool = nn.Sequential(
|
||||||
|
nn.Conv2d(512, 32, kernel_size=1, stride=1, padding=0, bias=False),
|
||||||
|
nn.AvgPool2d(7, stride=7),
|
||||||
|
)
|
||||||
|
self.lda2_fc = nn.Linear(32 * 16, lda_out_channels)
|
||||||
|
|
||||||
|
self.lda3_pool = nn.Sequential(
|
||||||
|
nn.Conv2d(1024, 64, kernel_size=1, stride=1, padding=0, bias=False),
|
||||||
|
nn.AvgPool2d(7, stride=7),
|
||||||
|
)
|
||||||
|
self.lda3_fc = nn.Linear(64 * 4, lda_out_channels)
|
||||||
|
|
||||||
|
self.lda4_pool = nn.AvgPool2d(7, stride=7)
|
||||||
|
self.lda4_fc = nn.Linear(2048, in_chn - lda_out_channels * 3)
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, blocks, stride=1):
|
||||||
|
downsample = None
|
||||||
|
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||||
|
downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(planes * block.expansion),
|
||||||
|
)
|
||||||
|
|
||||||
|
layers = []
|
||||||
|
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||||
|
self.inplanes = planes * block.expansion
|
||||||
|
for i in range(1, blocks):
|
||||||
|
layers.append(block(self.inplanes, planes))
|
||||||
|
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv1(x)
|
||||||
|
x = self.bn1(x)
|
||||||
|
x = self.relu(x)
|
||||||
|
x = self.maxpool(x)
|
||||||
|
x = self.layer1(x)
|
||||||
|
|
||||||
|
# the same effect as lda operation in the paper, but save much more memory
|
||||||
|
lda_1 = self.lda1_fc(self.lda1_pool(x).view(x.size(0), -1))
|
||||||
|
x = self.layer2(x)
|
||||||
|
lda_2 = self.lda2_fc(self.lda2_pool(x).view(x.size(0), -1))
|
||||||
|
x = self.layer3(x)
|
||||||
|
lda_3 = self.lda3_fc(self.lda3_pool(x).view(x.size(0), -1))
|
||||||
|
x = self.layer4(x)
|
||||||
|
lda_4 = self.lda4_fc(self.lda4_pool(x).view(x.size(0), -1))
|
||||||
|
|
||||||
|
vec = torch.cat((lda_1, lda_2, lda_3, lda_4), 1)
|
||||||
|
|
||||||
|
out = {}
|
||||||
|
out['hyper_in_feat'] = x
|
||||||
|
out['target_in_vec'] = vec
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def resnet50_backbone(lda_out_channels, in_chn, **kwargs):
|
||||||
|
"""Constructs a ResNet-50 model_hyper."""
|
||||||
|
model = ResNetBackbone(lda_out_channels, in_chn, Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
class TargetNet(nn.Module):
|
||||||
|
"""
|
||||||
|
Target network for quality prediction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, paras):
|
||||||
|
super(TargetNet, self).__init__()
|
||||||
|
self.l1 = nn.Sequential(
|
||||||
|
TargetFC(paras['target_fc1w'], paras['target_fc1b']),
|
||||||
|
nn.Sigmoid(),
|
||||||
|
)
|
||||||
|
self.l2 = nn.Sequential(
|
||||||
|
TargetFC(paras['target_fc2w'], paras['target_fc2b']),
|
||||||
|
nn.Sigmoid(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.l3 = nn.Sequential(
|
||||||
|
TargetFC(paras['target_fc3w'], paras['target_fc3b']),
|
||||||
|
nn.Sigmoid(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.l4 = nn.Sequential(
|
||||||
|
TargetFC(paras['target_fc4w'], paras['target_fc4b']),
|
||||||
|
nn.Sigmoid(),
|
||||||
|
TargetFC(paras['target_fc5w'], paras['target_fc5b']),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
q = self.l1(x)
|
||||||
|
# q = F.dropout(q)
|
||||||
|
q = self.l2(q)
|
||||||
|
q = self.l3(q)
|
||||||
|
q = self.l4(q).squeeze()
|
||||||
|
return q
|
||||||
|
|
||||||
|
|
||||||
|
class TargetFC(nn.Module):
|
||||||
|
"""
|
||||||
|
Fully connection operations for target net
|
||||||
|
Note:
|
||||||
|
Weights & biases are different for different images in a batch,
|
||||||
|
thus here we use group convolution for calculating images in a batch with individual weights & biases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, weight, bias):
|
||||||
|
super(TargetFC, self).__init__()
|
||||||
|
self.weight = weight
|
||||||
|
self.bias = bias
|
||||||
|
|
||||||
|
def forward(self, input_):
|
||||||
|
|
||||||
|
input_re = input_.view(-1, input_.shape[0] * input_.shape[1], input_.shape[2], input_.shape[3])
|
||||||
|
weight_re = self.weight.view(self.weight.shape[0] * self.weight.shape[1], self.weight.shape[2],
|
||||||
|
self.weight.shape[3], self.weight.shape[4])
|
||||||
|
bias_re = self.bias.view(self.bias.shape[0] * self.bias.shape[1])
|
||||||
|
out = F.conv2d(input=input_re, weight=weight_re, bias=bias_re, groups=self.weight.shape[0])
|
||||||
|
|
||||||
|
return out.view(input_.shape[0], self.weight.shape[1], input_.shape[2], input_.shape[3])
|
@ -0,0 +1,31 @@
|
|||||||
|
import torch
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .retinaface import RetinaFace
|
||||||
|
|
||||||
|
|
||||||
|
def init_detection_model(model_name, half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'retinaface_resnet50':
|
||||||
|
model = RetinaFace(network_name='resnet50', half=half, device=device)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth'
|
||||||
|
elif model_name == 'retinaface_mobile0.25':
|
||||||
|
model = RetinaFace(network_name='mobile0.25', half=half, device=device)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
|
||||||
|
# TODO: clean pretrained model
|
||||||
|
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
||||||
|
# remove unnecessary 'module.'
|
||||||
|
for k, v in deepcopy(load_net).items():
|
||||||
|
if k.startswith('module.'):
|
||||||
|
load_net[k[7:]] = v
|
||||||
|
load_net.pop(k)
|
||||||
|
model.load_state_dict(load_net, strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,219 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from .matlab_cp2tform import get_similarity_transform_for_cv2
|
||||||
|
|
||||||
|
# reference facial points, a list of coordinates (x,y)
|
||||||
|
REFERENCE_FACIAL_POINTS = [[30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278],
|
||||||
|
[33.54930115, 92.3655014], [62.72990036, 92.20410156]]
|
||||||
|
|
||||||
|
DEFAULT_CROP_SIZE = (96, 112)
|
||||||
|
|
||||||
|
|
||||||
|
class FaceWarpException(Exception):
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'In File {}:{}'.format(__file__, super.__str__(self))
|
||||||
|
|
||||||
|
|
||||||
|
def get_reference_facial_points(output_size=None, inner_padding_factor=0.0, outer_padding=(0, 0), default_square=False):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
get reference 5 key points according to crop settings:
|
||||||
|
0. Set default crop_size:
|
||||||
|
if default_square:
|
||||||
|
crop_size = (112, 112)
|
||||||
|
else:
|
||||||
|
crop_size = (96, 112)
|
||||||
|
1. Pad the crop_size by inner_padding_factor in each side;
|
||||||
|
2. Resize crop_size into (output_size - outer_padding*2),
|
||||||
|
pad into output_size with outer_padding;
|
||||||
|
3. Output reference_5point;
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@output_size: (w, h) or None
|
||||||
|
size of aligned face image
|
||||||
|
@inner_padding_factor: (w_factor, h_factor)
|
||||||
|
padding factor for inner (w, h)
|
||||||
|
@outer_padding: (w_pad, h_pad)
|
||||||
|
each row is a pair of coordinates (x, y)
|
||||||
|
@default_square: True or False
|
||||||
|
if True:
|
||||||
|
default crop_size = (112, 112)
|
||||||
|
else:
|
||||||
|
default crop_size = (96, 112);
|
||||||
|
!!! make sure, if output_size is not None:
|
||||||
|
(output_size - outer_padding)
|
||||||
|
= some_scale * (default crop_size * (1.0 +
|
||||||
|
inner_padding_factor))
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@reference_5point: 5x2 np.array
|
||||||
|
each row is a pair of transformed coordinates (x, y)
|
||||||
|
"""
|
||||||
|
|
||||||
|
tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
|
||||||
|
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
|
||||||
|
|
||||||
|
# 0) make the inner region a square
|
||||||
|
if default_square:
|
||||||
|
size_diff = max(tmp_crop_size) - tmp_crop_size
|
||||||
|
tmp_5pts += size_diff / 2
|
||||||
|
tmp_crop_size += size_diff
|
||||||
|
|
||||||
|
if (output_size and output_size[0] == tmp_crop_size[0] and output_size[1] == tmp_crop_size[1]):
|
||||||
|
|
||||||
|
return tmp_5pts
|
||||||
|
|
||||||
|
if (inner_padding_factor == 0 and outer_padding == (0, 0)):
|
||||||
|
if output_size is None:
|
||||||
|
return tmp_5pts
|
||||||
|
else:
|
||||||
|
raise FaceWarpException('No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
|
||||||
|
|
||||||
|
# check output size
|
||||||
|
if not (0 <= inner_padding_factor <= 1.0):
|
||||||
|
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
|
||||||
|
|
||||||
|
if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) and output_size is None):
|
||||||
|
output_size = tmp_crop_size * \
|
||||||
|
(1 + inner_padding_factor * 2).astype(np.int32)
|
||||||
|
output_size += np.array(outer_padding)
|
||||||
|
if not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1]):
|
||||||
|
raise FaceWarpException('Not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1])')
|
||||||
|
|
||||||
|
# 1) pad the inner region according inner_padding_factor
|
||||||
|
if inner_padding_factor > 0:
|
||||||
|
size_diff = tmp_crop_size * inner_padding_factor * 2
|
||||||
|
tmp_5pts += size_diff / 2
|
||||||
|
tmp_crop_size += np.round(size_diff).astype(np.int32)
|
||||||
|
|
||||||
|
# 2) resize the padded inner region
|
||||||
|
size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
|
||||||
|
|
||||||
|
if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
|
||||||
|
raise FaceWarpException('Must have (output_size - outer_padding)'
|
||||||
|
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')
|
||||||
|
|
||||||
|
scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
|
||||||
|
tmp_5pts = tmp_5pts * scale_factor
|
||||||
|
# size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
|
||||||
|
# tmp_5pts = tmp_5pts + size_diff / 2
|
||||||
|
tmp_crop_size = size_bf_outer_pad
|
||||||
|
|
||||||
|
# 3) add outer_padding to make output_size
|
||||||
|
reference_5point = tmp_5pts + np.array(outer_padding)
|
||||||
|
tmp_crop_size = output_size
|
||||||
|
|
||||||
|
return reference_5point
|
||||||
|
|
||||||
|
|
||||||
|
def get_affine_transform_matrix(src_pts, dst_pts):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
get affine transform matrix 'tfm' from src_pts to dst_pts
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@src_pts: Kx2 np.array
|
||||||
|
source points matrix, each row is a pair of coordinates (x, y)
|
||||||
|
@dst_pts: Kx2 np.array
|
||||||
|
destination points matrix, each row is a pair of coordinates (x, y)
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@tfm: 2x3 np.array
|
||||||
|
transform matrix from src_pts to dst_pts
|
||||||
|
"""
|
||||||
|
|
||||||
|
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
|
||||||
|
n_pts = src_pts.shape[0]
|
||||||
|
ones = np.ones((n_pts, 1), src_pts.dtype)
|
||||||
|
src_pts_ = np.hstack([src_pts, ones])
|
||||||
|
dst_pts_ = np.hstack([dst_pts, ones])
|
||||||
|
|
||||||
|
A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
|
||||||
|
|
||||||
|
if rank == 3:
|
||||||
|
tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]], [A[0, 1], A[1, 1], A[2, 1]]])
|
||||||
|
elif rank == 2:
|
||||||
|
tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
|
||||||
|
|
||||||
|
return tfm
|
||||||
|
|
||||||
|
|
||||||
|
def warp_and_crop_face(src_img, facial_pts, reference_pts=None, crop_size=(96, 112), align_type='smilarity'):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
apply affine transform 'trans' to uv
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@src_img: 3x3 np.array
|
||||||
|
input image
|
||||||
|
@facial_pts: could be
|
||||||
|
1)a list of K coordinates (x,y)
|
||||||
|
or
|
||||||
|
2) Kx2 or 2xK np.array
|
||||||
|
each row or col is a pair of coordinates (x, y)
|
||||||
|
@reference_pts: could be
|
||||||
|
1) a list of K coordinates (x,y)
|
||||||
|
or
|
||||||
|
2) Kx2 or 2xK np.array
|
||||||
|
each row or col is a pair of coordinates (x, y)
|
||||||
|
or
|
||||||
|
3) None
|
||||||
|
if None, use default reference facial points
|
||||||
|
@crop_size: (w, h)
|
||||||
|
output face image size
|
||||||
|
@align_type: transform type, could be one of
|
||||||
|
1) 'similarity': use similarity transform
|
||||||
|
2) 'cv2_affine': use the first 3 points to do affine transform,
|
||||||
|
by calling cv2.getAffineTransform()
|
||||||
|
3) 'affine': use all points to do affine transform
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@face_img: output face image with size (w, h) = @crop_size
|
||||||
|
"""
|
||||||
|
|
||||||
|
if reference_pts is None:
|
||||||
|
if crop_size[0] == 96 and crop_size[1] == 112:
|
||||||
|
reference_pts = REFERENCE_FACIAL_POINTS
|
||||||
|
else:
|
||||||
|
default_square = False
|
||||||
|
inner_padding_factor = 0
|
||||||
|
outer_padding = (0, 0)
|
||||||
|
output_size = crop_size
|
||||||
|
|
||||||
|
reference_pts = get_reference_facial_points(output_size, inner_padding_factor, outer_padding,
|
||||||
|
default_square)
|
||||||
|
|
||||||
|
ref_pts = np.float32(reference_pts)
|
||||||
|
ref_pts_shp = ref_pts.shape
|
||||||
|
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
|
||||||
|
raise FaceWarpException('reference_pts.shape must be (K,2) or (2,K) and K>2')
|
||||||
|
|
||||||
|
if ref_pts_shp[0] == 2:
|
||||||
|
ref_pts = ref_pts.T
|
||||||
|
|
||||||
|
src_pts = np.float32(facial_pts)
|
||||||
|
src_pts_shp = src_pts.shape
|
||||||
|
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
|
||||||
|
raise FaceWarpException('facial_pts.shape must be (K,2) or (2,K) and K>2')
|
||||||
|
|
||||||
|
if src_pts_shp[0] == 2:
|
||||||
|
src_pts = src_pts.T
|
||||||
|
|
||||||
|
if src_pts.shape != ref_pts.shape:
|
||||||
|
raise FaceWarpException('facial_pts and reference_pts must have the same shape')
|
||||||
|
|
||||||
|
if align_type == 'cv2_affine':
|
||||||
|
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
|
||||||
|
elif align_type == 'affine':
|
||||||
|
tfm = get_affine_transform_matrix(src_pts, ref_pts)
|
||||||
|
else:
|
||||||
|
tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
|
||||||
|
|
||||||
|
face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
|
||||||
|
|
||||||
|
return face_img
|
@ -0,0 +1,317 @@
|
|||||||
|
import numpy as np
|
||||||
|
from numpy.linalg import inv, lstsq
|
||||||
|
from numpy.linalg import matrix_rank as rank
|
||||||
|
from numpy.linalg import norm
|
||||||
|
|
||||||
|
|
||||||
|
class MatlabCp2tormException(Exception):
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'In File {}:{}'.format(__file__, super.__str__(self))
|
||||||
|
|
||||||
|
|
||||||
|
def tformfwd(trans, uv):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
apply affine transform 'trans' to uv
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@trans: 3x3 np.array
|
||||||
|
transform matrix
|
||||||
|
@uv: Kx2 np.array
|
||||||
|
each row is a pair of coordinates (x, y)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@xy: Kx2 np.array
|
||||||
|
each row is a pair of transformed coordinates (x, y)
|
||||||
|
"""
|
||||||
|
uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
|
||||||
|
xy = np.dot(uv, trans)
|
||||||
|
xy = xy[:, 0:-1]
|
||||||
|
return xy
|
||||||
|
|
||||||
|
|
||||||
|
def tforminv(trans, uv):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
apply the inverse of affine transform 'trans' to uv
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@trans: 3x3 np.array
|
||||||
|
transform matrix
|
||||||
|
@uv: Kx2 np.array
|
||||||
|
each row is a pair of coordinates (x, y)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@xy: Kx2 np.array
|
||||||
|
each row is a pair of inverse-transformed coordinates (x, y)
|
||||||
|
"""
|
||||||
|
Tinv = inv(trans)
|
||||||
|
xy = tformfwd(Tinv, uv)
|
||||||
|
return xy
|
||||||
|
|
||||||
|
|
||||||
|
def findNonreflectiveSimilarity(uv, xy, options=None):
|
||||||
|
options = {'K': 2}
|
||||||
|
|
||||||
|
K = options['K']
|
||||||
|
M = xy.shape[0]
|
||||||
|
x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
||||||
|
y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
||||||
|
|
||||||
|
tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
|
||||||
|
tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
|
||||||
|
X = np.vstack((tmp1, tmp2))
|
||||||
|
|
||||||
|
u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
||||||
|
v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
||||||
|
U = np.vstack((u, v))
|
||||||
|
|
||||||
|
# We know that X * r = U
|
||||||
|
if rank(X) >= 2 * K:
|
||||||
|
r, _, _, _ = lstsq(X, U, rcond=-1)
|
||||||
|
r = np.squeeze(r)
|
||||||
|
else:
|
||||||
|
raise Exception('cp2tform:twoUniquePointsReq')
|
||||||
|
sc = r[0]
|
||||||
|
ss = r[1]
|
||||||
|
tx = r[2]
|
||||||
|
ty = r[3]
|
||||||
|
|
||||||
|
Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
|
||||||
|
T = inv(Tinv)
|
||||||
|
T[:, 2] = np.array([0, 0, 1])
|
||||||
|
|
||||||
|
return T, Tinv
|
||||||
|
|
||||||
|
|
||||||
|
def findSimilarity(uv, xy, options=None):
|
||||||
|
options = {'K': 2}
|
||||||
|
|
||||||
|
# uv = np.array(uv)
|
||||||
|
# xy = np.array(xy)
|
||||||
|
|
||||||
|
# Solve for trans1
|
||||||
|
trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
|
||||||
|
|
||||||
|
# Solve for trans2
|
||||||
|
|
||||||
|
# manually reflect the xy data across the Y-axis
|
||||||
|
xyR = xy
|
||||||
|
xyR[:, 0] = -1 * xyR[:, 0]
|
||||||
|
|
||||||
|
trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
|
||||||
|
|
||||||
|
# manually reflect the tform to undo the reflection done on xyR
|
||||||
|
TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
||||||
|
|
||||||
|
trans2 = np.dot(trans2r, TreflectY)
|
||||||
|
|
||||||
|
# Figure out if trans1 or trans2 is better
|
||||||
|
xy1 = tformfwd(trans1, uv)
|
||||||
|
norm1 = norm(xy1 - xy)
|
||||||
|
|
||||||
|
xy2 = tformfwd(trans2, uv)
|
||||||
|
norm2 = norm(xy2 - xy)
|
||||||
|
|
||||||
|
if norm1 <= norm2:
|
||||||
|
return trans1, trans1_inv
|
||||||
|
else:
|
||||||
|
trans2_inv = inv(trans2)
|
||||||
|
return trans2, trans2_inv
|
||||||
|
|
||||||
|
|
||||||
|
def get_similarity_transform(src_pts, dst_pts, reflective=True):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
Find Similarity Transform Matrix 'trans':
|
||||||
|
u = src_pts[:, 0]
|
||||||
|
v = src_pts[:, 1]
|
||||||
|
x = dst_pts[:, 0]
|
||||||
|
y = dst_pts[:, 1]
|
||||||
|
[x, y, 1] = [u, v, 1] * trans
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@src_pts: Kx2 np.array
|
||||||
|
source points, each row is a pair of coordinates (x, y)
|
||||||
|
@dst_pts: Kx2 np.array
|
||||||
|
destination points, each row is a pair of transformed
|
||||||
|
coordinates (x, y)
|
||||||
|
@reflective: True or False
|
||||||
|
if True:
|
||||||
|
use reflective similarity transform
|
||||||
|
else:
|
||||||
|
use non-reflective similarity transform
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@trans: 3x3 np.array
|
||||||
|
transform matrix from uv to xy
|
||||||
|
trans_inv: 3x3 np.array
|
||||||
|
inverse of trans, transform matrix from xy to uv
|
||||||
|
"""
|
||||||
|
|
||||||
|
if reflective:
|
||||||
|
trans, trans_inv = findSimilarity(src_pts, dst_pts)
|
||||||
|
else:
|
||||||
|
trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
|
||||||
|
|
||||||
|
return trans, trans_inv
|
||||||
|
|
||||||
|
|
||||||
|
def cvt_tform_mat_for_cv2(trans):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
Convert Transform Matrix 'trans' into 'cv2_trans' which could be
|
||||||
|
directly used by cv2.warpAffine():
|
||||||
|
u = src_pts[:, 0]
|
||||||
|
v = src_pts[:, 1]
|
||||||
|
x = dst_pts[:, 0]
|
||||||
|
y = dst_pts[:, 1]
|
||||||
|
[x, y].T = cv_trans * [u, v, 1].T
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@trans: 3x3 np.array
|
||||||
|
transform matrix from uv to xy
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@cv2_trans: 2x3 np.array
|
||||||
|
transform matrix from src_pts to dst_pts, could be directly used
|
||||||
|
for cv2.warpAffine()
|
||||||
|
"""
|
||||||
|
cv2_trans = trans[:, 0:2].T
|
||||||
|
|
||||||
|
return cv2_trans
|
||||||
|
|
||||||
|
|
||||||
|
def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
|
||||||
|
"""
|
||||||
|
Function:
|
||||||
|
----------
|
||||||
|
Find Similarity Transform Matrix 'cv2_trans' which could be
|
||||||
|
directly used by cv2.warpAffine():
|
||||||
|
u = src_pts[:, 0]
|
||||||
|
v = src_pts[:, 1]
|
||||||
|
x = dst_pts[:, 0]
|
||||||
|
y = dst_pts[:, 1]
|
||||||
|
[x, y].T = cv_trans * [u, v, 1].T
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
----------
|
||||||
|
@src_pts: Kx2 np.array
|
||||||
|
source points, each row is a pair of coordinates (x, y)
|
||||||
|
@dst_pts: Kx2 np.array
|
||||||
|
destination points, each row is a pair of transformed
|
||||||
|
coordinates (x, y)
|
||||||
|
reflective: True or False
|
||||||
|
if True:
|
||||||
|
use reflective similarity transform
|
||||||
|
else:
|
||||||
|
use non-reflective similarity transform
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
----------
|
||||||
|
@cv2_trans: 2x3 np.array
|
||||||
|
transform matrix from src_pts to dst_pts, could be directly used
|
||||||
|
for cv2.warpAffine()
|
||||||
|
"""
|
||||||
|
trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
|
||||||
|
cv2_trans = cvt_tform_mat_for_cv2(trans)
|
||||||
|
|
||||||
|
return cv2_trans
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
"""
|
||||||
|
u = [0, 6, -2]
|
||||||
|
v = [0, 3, 5]
|
||||||
|
x = [-1, 0, 4]
|
||||||
|
y = [-1, -10, 4]
|
||||||
|
|
||||||
|
# In Matlab, run:
|
||||||
|
#
|
||||||
|
# uv = [u'; v'];
|
||||||
|
# xy = [x'; y'];
|
||||||
|
# tform_sim=cp2tform(uv,xy,'similarity');
|
||||||
|
#
|
||||||
|
# trans = tform_sim.tdata.T
|
||||||
|
# ans =
|
||||||
|
# -0.0764 -1.6190 0
|
||||||
|
# 1.6190 -0.0764 0
|
||||||
|
# -3.2156 0.0290 1.0000
|
||||||
|
# trans_inv = tform_sim.tdata.Tinv
|
||||||
|
# ans =
|
||||||
|
#
|
||||||
|
# -0.0291 0.6163 0
|
||||||
|
# -0.6163 -0.0291 0
|
||||||
|
# -0.0756 1.9826 1.0000
|
||||||
|
# xy_m=tformfwd(tform_sim, u,v)
|
||||||
|
#
|
||||||
|
# xy_m =
|
||||||
|
#
|
||||||
|
# -3.2156 0.0290
|
||||||
|
# 1.1833 -9.9143
|
||||||
|
# 5.0323 2.8853
|
||||||
|
# uv_m=tforminv(tform_sim, x,y)
|
||||||
|
#
|
||||||
|
# uv_m =
|
||||||
|
#
|
||||||
|
# 0.5698 1.3953
|
||||||
|
# 6.0872 2.2733
|
||||||
|
# -2.6570 4.3314
|
||||||
|
"""
|
||||||
|
u = [0, 6, -2]
|
||||||
|
v = [0, 3, 5]
|
||||||
|
x = [-1, 0, 4]
|
||||||
|
y = [-1, -10, 4]
|
||||||
|
|
||||||
|
uv = np.array((u, v)).T
|
||||||
|
xy = np.array((x, y)).T
|
||||||
|
|
||||||
|
print('\n--->uv:')
|
||||||
|
print(uv)
|
||||||
|
print('\n--->xy:')
|
||||||
|
print(xy)
|
||||||
|
|
||||||
|
trans, trans_inv = get_similarity_transform(uv, xy)
|
||||||
|
|
||||||
|
print('\n--->trans matrix:')
|
||||||
|
print(trans)
|
||||||
|
|
||||||
|
print('\n--->trans_inv matrix:')
|
||||||
|
print(trans_inv)
|
||||||
|
|
||||||
|
print('\n---> apply transform to uv')
|
||||||
|
print('\nxy_m = uv_augmented * trans')
|
||||||
|
uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
|
||||||
|
xy_m = np.dot(uv_aug, trans)
|
||||||
|
print(xy_m)
|
||||||
|
|
||||||
|
print('\nxy_m = tformfwd(trans, uv)')
|
||||||
|
xy_m = tformfwd(trans, uv)
|
||||||
|
print(xy_m)
|
||||||
|
|
||||||
|
print('\n---> apply inverse transform to xy')
|
||||||
|
print('\nuv_m = xy_augmented * trans_inv')
|
||||||
|
xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
|
||||||
|
uv_m = np.dot(xy_aug, trans_inv)
|
||||||
|
print(uv_m)
|
||||||
|
|
||||||
|
print('\nuv_m = tformfwd(trans_inv, xy)')
|
||||||
|
uv_m = tformfwd(trans_inv, xy)
|
||||||
|
print(uv_m)
|
||||||
|
|
||||||
|
uv_m = tforminv(trans, xy)
|
||||||
|
print('\nuv_m = tforminv(trans, xy)')
|
||||||
|
print(uv_m)
|
@ -0,0 +1,366 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from PIL import Image
|
||||||
|
from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.detection.align_trans import get_reference_facial_points, warp_and_crop_face
|
||||||
|
from imaginairy.vendored.facexlib.detection.retinaface_net import FPN, SSH, MobileNetV1, make_bbox_head, make_class_head, make_landmark_head
|
||||||
|
from imaginairy.vendored.facexlib.detection.retinaface_utils import (PriorBox, batched_decode, batched_decode_landm, decode, decode_landm,
|
||||||
|
py_cpu_nms)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_config(network_name):
|
||||||
|
|
||||||
|
cfg_mnet = {
|
||||||
|
'name': 'mobilenet0.25',
|
||||||
|
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||||
|
'steps': [8, 16, 32],
|
||||||
|
'variance': [0.1, 0.2],
|
||||||
|
'clip': False,
|
||||||
|
'loc_weight': 2.0,
|
||||||
|
'gpu_train': True,
|
||||||
|
'batch_size': 32,
|
||||||
|
'ngpu': 1,
|
||||||
|
'epoch': 250,
|
||||||
|
'decay1': 190,
|
||||||
|
'decay2': 220,
|
||||||
|
'image_size': 640,
|
||||||
|
'return_layers': {
|
||||||
|
'stage1': 1,
|
||||||
|
'stage2': 2,
|
||||||
|
'stage3': 3
|
||||||
|
},
|
||||||
|
'in_channel': 32,
|
||||||
|
'out_channel': 64
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg_re50 = {
|
||||||
|
'name': 'Resnet50',
|
||||||
|
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||||
|
'steps': [8, 16, 32],
|
||||||
|
'variance': [0.1, 0.2],
|
||||||
|
'clip': False,
|
||||||
|
'loc_weight': 2.0,
|
||||||
|
'gpu_train': True,
|
||||||
|
'batch_size': 24,
|
||||||
|
'ngpu': 4,
|
||||||
|
'epoch': 100,
|
||||||
|
'decay1': 70,
|
||||||
|
'decay2': 90,
|
||||||
|
'image_size': 840,
|
||||||
|
'return_layers': {
|
||||||
|
'layer2': 1,
|
||||||
|
'layer3': 2,
|
||||||
|
'layer4': 3
|
||||||
|
},
|
||||||
|
'in_channel': 256,
|
||||||
|
'out_channel': 256
|
||||||
|
}
|
||||||
|
|
||||||
|
if network_name == 'mobile0.25':
|
||||||
|
return cfg_mnet
|
||||||
|
elif network_name == 'resnet50':
|
||||||
|
return cfg_re50
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'network_name={network_name}')
|
||||||
|
|
||||||
|
|
||||||
|
class RetinaFace(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, network_name='resnet50', half=False, phase='test', device=None):
|
||||||
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if device is None else device
|
||||||
|
|
||||||
|
super(RetinaFace, self).__init__()
|
||||||
|
self.half_inference = half
|
||||||
|
cfg = generate_config(network_name)
|
||||||
|
self.backbone = cfg['name']
|
||||||
|
|
||||||
|
self.model_name = f'retinaface_{network_name}'
|
||||||
|
self.cfg = cfg
|
||||||
|
self.phase = phase
|
||||||
|
self.target_size, self.max_size = 1600, 2150
|
||||||
|
self.resize, self.scale, self.scale1 = 1., None, None
|
||||||
|
self.mean_tensor = torch.tensor([[[[104.]], [[117.]], [[123.]]]], device=self.device)
|
||||||
|
self.reference = get_reference_facial_points(default_square=True)
|
||||||
|
# Build network.
|
||||||
|
backbone = None
|
||||||
|
if cfg['name'] == 'mobilenet0.25':
|
||||||
|
backbone = MobileNetV1()
|
||||||
|
self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
|
||||||
|
elif cfg['name'] == 'Resnet50':
|
||||||
|
import torchvision.models as models
|
||||||
|
backbone = models.resnet50(pretrained=False)
|
||||||
|
self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
|
||||||
|
|
||||||
|
in_channels_stage2 = cfg['in_channel']
|
||||||
|
in_channels_list = [
|
||||||
|
in_channels_stage2 * 2,
|
||||||
|
in_channels_stage2 * 4,
|
||||||
|
in_channels_stage2 * 8,
|
||||||
|
]
|
||||||
|
|
||||||
|
out_channels = cfg['out_channel']
|
||||||
|
self.fpn = FPN(in_channels_list, out_channels)
|
||||||
|
self.ssh1 = SSH(out_channels, out_channels)
|
||||||
|
self.ssh2 = SSH(out_channels, out_channels)
|
||||||
|
self.ssh3 = SSH(out_channels, out_channels)
|
||||||
|
|
||||||
|
self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||||
|
|
||||||
|
self.to(self.device)
|
||||||
|
self.eval()
|
||||||
|
if self.half_inference:
|
||||||
|
self.half()
|
||||||
|
|
||||||
|
def forward(self, inputs):
|
||||||
|
out = self.body(inputs)
|
||||||
|
|
||||||
|
if self.backbone == 'mobilenet0.25' or self.backbone == 'Resnet50':
|
||||||
|
out = list(out.values())
|
||||||
|
# FPN
|
||||||
|
fpn = self.fpn(out)
|
||||||
|
|
||||||
|
# SSH
|
||||||
|
feature1 = self.ssh1(fpn[0])
|
||||||
|
feature2 = self.ssh2(fpn[1])
|
||||||
|
feature3 = self.ssh3(fpn[2])
|
||||||
|
features = [feature1, feature2, feature3]
|
||||||
|
|
||||||
|
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||||
|
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||||
|
tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]
|
||||||
|
ldm_regressions = (torch.cat(tmp, dim=1))
|
||||||
|
|
||||||
|
if self.phase == 'train':
|
||||||
|
output = (bbox_regressions, classifications, ldm_regressions)
|
||||||
|
else:
|
||||||
|
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def __detect_faces(self, inputs):
|
||||||
|
# get scale
|
||||||
|
height, width = inputs.shape[2:]
|
||||||
|
self.scale = torch.tensor([width, height, width, height], dtype=torch.float32, device=self.device)
|
||||||
|
tmp = [width, height, width, height, width, height, width, height, width, height]
|
||||||
|
self.scale1 = torch.tensor(tmp, dtype=torch.float32, device=self.device)
|
||||||
|
|
||||||
|
# forawrd
|
||||||
|
inputs = inputs.to(self.device)
|
||||||
|
if self.half_inference:
|
||||||
|
inputs = inputs.half()
|
||||||
|
loc, conf, landmarks = self(inputs)
|
||||||
|
|
||||||
|
# get priorbox
|
||||||
|
priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])
|
||||||
|
priors = priorbox.forward().to(self.device)
|
||||||
|
|
||||||
|
return loc, conf, landmarks, priors
|
||||||
|
|
||||||
|
# single image detection
|
||||||
|
def transform(self, image, use_origin_size):
|
||||||
|
# convert to opencv format
|
||||||
|
if isinstance(image, Image.Image):
|
||||||
|
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
|
||||||
|
image = image.astype(np.float32)
|
||||||
|
|
||||||
|
# testing scale
|
||||||
|
im_size_min = np.min(image.shape[0:2])
|
||||||
|
im_size_max = np.max(image.shape[0:2])
|
||||||
|
resize = float(self.target_size) / float(im_size_min)
|
||||||
|
|
||||||
|
# prevent bigger axis from being more than max_size
|
||||||
|
if np.round(resize * im_size_max) > self.max_size:
|
||||||
|
resize = float(self.max_size) / float(im_size_max)
|
||||||
|
resize = 1 if use_origin_size else resize
|
||||||
|
|
||||||
|
# resize
|
||||||
|
if resize != 1:
|
||||||
|
image = cv2.resize(image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
|
||||||
|
|
||||||
|
# convert to torch.tensor format
|
||||||
|
# image -= (104, 117, 123)
|
||||||
|
image = image.transpose(2, 0, 1)
|
||||||
|
image = torch.from_numpy(image).unsqueeze(0)
|
||||||
|
|
||||||
|
return image, resize
|
||||||
|
|
||||||
|
def detect_faces(
|
||||||
|
self,
|
||||||
|
image,
|
||||||
|
conf_threshold=0.8,
|
||||||
|
nms_threshold=0.4,
|
||||||
|
use_origin_size=True,
|
||||||
|
):
|
||||||
|
image, self.resize = self.transform(image, use_origin_size)
|
||||||
|
image = image.to(self.device)
|
||||||
|
if self.half_inference:
|
||||||
|
image = image.half()
|
||||||
|
image = image - self.mean_tensor
|
||||||
|
|
||||||
|
loc, conf, landmarks, priors = self.__detect_faces(image)
|
||||||
|
|
||||||
|
boxes = decode(loc.data.squeeze(0), priors.data, self.cfg['variance'])
|
||||||
|
boxes = boxes * self.scale / self.resize
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
|
||||||
|
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||||
|
|
||||||
|
landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg['variance'])
|
||||||
|
landmarks = landmarks * self.scale1 / self.resize
|
||||||
|
landmarks = landmarks.cpu().numpy()
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
inds = np.where(scores > conf_threshold)[0]
|
||||||
|
boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]
|
||||||
|
|
||||||
|
# sort
|
||||||
|
order = scores.argsort()[::-1]
|
||||||
|
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||||
|
keep = py_cpu_nms(bounding_boxes, nms_threshold)
|
||||||
|
bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]
|
||||||
|
# self.t['forward_pass'].toc()
|
||||||
|
# print(self.t['forward_pass'].average_time)
|
||||||
|
# import sys
|
||||||
|
# sys.stdout.flush()
|
||||||
|
return np.concatenate((bounding_boxes, landmarks), axis=1)
|
||||||
|
|
||||||
|
def __align_multi(self, image, boxes, landmarks, limit=None):
|
||||||
|
|
||||||
|
if len(boxes) < 1:
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
if limit:
|
||||||
|
boxes = boxes[:limit]
|
||||||
|
landmarks = landmarks[:limit]
|
||||||
|
|
||||||
|
faces = []
|
||||||
|
for landmark in landmarks:
|
||||||
|
facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]
|
||||||
|
|
||||||
|
warped_face = warp_and_crop_face(np.array(image), facial5points, self.reference, crop_size=(112, 112))
|
||||||
|
faces.append(warped_face)
|
||||||
|
|
||||||
|
return np.concatenate((boxes, landmarks), axis=1), faces
|
||||||
|
|
||||||
|
def align_multi(self, img, conf_threshold=0.8, limit=None):
|
||||||
|
|
||||||
|
rlt = self.detect_faces(img, conf_threshold=conf_threshold)
|
||||||
|
boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]
|
||||||
|
|
||||||
|
return self.__align_multi(img, boxes, landmarks, limit)
|
||||||
|
|
||||||
|
# batched detection
|
||||||
|
def batched_transform(self, frames, use_origin_size):
|
||||||
|
"""
|
||||||
|
Arguments:
|
||||||
|
frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],
|
||||||
|
type=np.float32, BGR format).
|
||||||
|
use_origin_size: whether to use origin size.
|
||||||
|
"""
|
||||||
|
from_PIL = True if isinstance(frames[0], Image.Image) else False
|
||||||
|
|
||||||
|
# convert to opencv format
|
||||||
|
if from_PIL:
|
||||||
|
frames = [cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames]
|
||||||
|
frames = np.asarray(frames, dtype=np.float32)
|
||||||
|
|
||||||
|
# testing scale
|
||||||
|
im_size_min = np.min(frames[0].shape[0:2])
|
||||||
|
im_size_max = np.max(frames[0].shape[0:2])
|
||||||
|
resize = float(self.target_size) / float(im_size_min)
|
||||||
|
|
||||||
|
# prevent bigger axis from being more than max_size
|
||||||
|
if np.round(resize * im_size_max) > self.max_size:
|
||||||
|
resize = float(self.max_size) / float(im_size_max)
|
||||||
|
resize = 1 if use_origin_size else resize
|
||||||
|
|
||||||
|
# resize
|
||||||
|
if resize != 1:
|
||||||
|
if not from_PIL:
|
||||||
|
frames = F.interpolate(frames, scale_factor=resize)
|
||||||
|
else:
|
||||||
|
frames = [
|
||||||
|
cv2.resize(frame, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
|
||||||
|
for frame in frames
|
||||||
|
]
|
||||||
|
|
||||||
|
# convert to torch.tensor format
|
||||||
|
if not from_PIL:
|
||||||
|
frames = frames.transpose(1, 2).transpose(1, 3).contiguous()
|
||||||
|
else:
|
||||||
|
frames = frames.transpose((0, 3, 1, 2))
|
||||||
|
frames = torch.from_numpy(frames)
|
||||||
|
|
||||||
|
return frames, resize
|
||||||
|
|
||||||
|
def batched_detect_faces(self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True):
|
||||||
|
"""
|
||||||
|
Arguments:
|
||||||
|
frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],
|
||||||
|
type=np.uint8, BGR format).
|
||||||
|
conf_threshold: confidence threshold.
|
||||||
|
nms_threshold: nms threshold.
|
||||||
|
use_origin_size: whether to use origin size.
|
||||||
|
Returns:
|
||||||
|
final_bounding_boxes: list of np.array ([n_boxes, 5],
|
||||||
|
type=np.float32).
|
||||||
|
final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).
|
||||||
|
"""
|
||||||
|
# self.t['forward_pass'].tic()
|
||||||
|
frames, self.resize = self.batched_transform(frames, use_origin_size)
|
||||||
|
frames = frames.to(self.device)
|
||||||
|
frames = frames - self.mean_tensor
|
||||||
|
|
||||||
|
b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)
|
||||||
|
|
||||||
|
final_bounding_boxes, final_landmarks = [], []
|
||||||
|
|
||||||
|
# decode
|
||||||
|
priors = priors.unsqueeze(0)
|
||||||
|
b_loc = batched_decode(b_loc, priors, self.cfg['variance']) * self.scale / self.resize
|
||||||
|
b_landmarks = batched_decode_landm(b_landmarks, priors, self.cfg['variance']) * self.scale1 / self.resize
|
||||||
|
b_conf = b_conf[:, :, 1]
|
||||||
|
|
||||||
|
# index for selection
|
||||||
|
b_indice = b_conf > conf_threshold
|
||||||
|
|
||||||
|
# concat
|
||||||
|
b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()
|
||||||
|
|
||||||
|
for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
pred, landm = pred[inds, :], landm[inds, :]
|
||||||
|
if pred.shape[0] == 0:
|
||||||
|
final_bounding_boxes.append(np.array([], dtype=np.float32))
|
||||||
|
final_landmarks.append(np.array([], dtype=np.float32))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# sort
|
||||||
|
# order = score.argsort(descending=True)
|
||||||
|
# box, landm, score = box[order], landm[order], score[order]
|
||||||
|
|
||||||
|
# to CPU
|
||||||
|
bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()
|
||||||
|
|
||||||
|
# NMS
|
||||||
|
keep = py_cpu_nms(bounding_boxes, nms_threshold)
|
||||||
|
bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]
|
||||||
|
|
||||||
|
# append
|
||||||
|
final_bounding_boxes.append(bounding_boxes)
|
||||||
|
final_landmarks.append(landmarks)
|
||||||
|
# self.t['forward_pass'].toc(average=True)
|
||||||
|
# self.batch_time += self.t['forward_pass'].diff
|
||||||
|
# self.total_frame += len(frames)
|
||||||
|
# print(self.batch_time / self.total_frame)
|
||||||
|
|
||||||
|
return final_bounding_boxes, final_landmarks
|
@ -0,0 +1,196 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
def conv_bn(inp, oup, stride=1, leaky=0):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True))
|
||||||
|
|
||||||
|
|
||||||
|
def conv_bn_no_relu(inp, oup, stride):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def conv_bn1X1(inp, oup, stride, leaky=0):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True))
|
||||||
|
|
||||||
|
|
||||||
|
def conv_dw(inp, oup, stride, leaky=0.1):
|
||||||
|
return nn.Sequential(
|
||||||
|
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
||||||
|
nn.BatchNorm2d(inp),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True),
|
||||||
|
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
nn.LeakyReLU(negative_slope=leaky, inplace=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SSH(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_channel, out_channel):
|
||||||
|
super(SSH, self).__init__()
|
||||||
|
assert out_channel % 4 == 0
|
||||||
|
leaky = 0
|
||||||
|
if (out_channel <= 64):
|
||||||
|
leaky = 0.1
|
||||||
|
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
|
||||||
|
|
||||||
|
self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
|
||||||
|
self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
|
||||||
|
|
||||||
|
self.conv7X7_2 = conv_bn(out_channel // 4, out_channel // 4, stride=1, leaky=leaky)
|
||||||
|
self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
conv3X3 = self.conv3X3(input)
|
||||||
|
|
||||||
|
conv5X5_1 = self.conv5X5_1(input)
|
||||||
|
conv5X5 = self.conv5X5_2(conv5X5_1)
|
||||||
|
|
||||||
|
conv7X7_2 = self.conv7X7_2(conv5X5_1)
|
||||||
|
conv7X7 = self.conv7x7_3(conv7X7_2)
|
||||||
|
|
||||||
|
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
|
||||||
|
out = F.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class FPN(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_channels_list, out_channels):
|
||||||
|
super(FPN, self).__init__()
|
||||||
|
leaky = 0
|
||||||
|
if (out_channels <= 64):
|
||||||
|
leaky = 0.1
|
||||||
|
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky)
|
||||||
|
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky)
|
||||||
|
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky)
|
||||||
|
|
||||||
|
self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
|
||||||
|
self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
# names = list(input.keys())
|
||||||
|
# input = list(input.values())
|
||||||
|
|
||||||
|
output1 = self.output1(input[0])
|
||||||
|
output2 = self.output2(input[1])
|
||||||
|
output3 = self.output3(input[2])
|
||||||
|
|
||||||
|
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode='nearest')
|
||||||
|
output2 = output2 + up3
|
||||||
|
output2 = self.merge2(output2)
|
||||||
|
|
||||||
|
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode='nearest')
|
||||||
|
output1 = output1 + up2
|
||||||
|
output1 = self.merge1(output1)
|
||||||
|
|
||||||
|
out = [output1, output2, output3]
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNetV1(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(MobileNetV1, self).__init__()
|
||||||
|
self.stage1 = nn.Sequential(
|
||||||
|
conv_bn(3, 8, 2, leaky=0.1), # 3
|
||||||
|
conv_dw(8, 16, 1), # 7
|
||||||
|
conv_dw(16, 32, 2), # 11
|
||||||
|
conv_dw(32, 32, 1), # 19
|
||||||
|
conv_dw(32, 64, 2), # 27
|
||||||
|
conv_dw(64, 64, 1), # 43
|
||||||
|
)
|
||||||
|
self.stage2 = nn.Sequential(
|
||||||
|
conv_dw(64, 128, 2), # 43 + 16 = 59
|
||||||
|
conv_dw(128, 128, 1), # 59 + 32 = 91
|
||||||
|
conv_dw(128, 128, 1), # 91 + 32 = 123
|
||||||
|
conv_dw(128, 128, 1), # 123 + 32 = 155
|
||||||
|
conv_dw(128, 128, 1), # 155 + 32 = 187
|
||||||
|
conv_dw(128, 128, 1), # 187 + 32 = 219
|
||||||
|
)
|
||||||
|
self.stage3 = nn.Sequential(
|
||||||
|
conv_dw(128, 256, 2), # 219 +3 2 = 241
|
||||||
|
conv_dw(256, 256, 1), # 241 + 64 = 301
|
||||||
|
)
|
||||||
|
self.avg = nn.AdaptiveAvgPool2d((1, 1))
|
||||||
|
self.fc = nn.Linear(256, 1000)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.stage1(x)
|
||||||
|
x = self.stage2(x)
|
||||||
|
x = self.stage3(x)
|
||||||
|
x = self.avg(x)
|
||||||
|
# x = self.model(x)
|
||||||
|
x = x.view(-1, 256)
|
||||||
|
x = self.fc(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class ClassHead(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, inchannels=512, num_anchors=3):
|
||||||
|
super(ClassHead, self).__init__()
|
||||||
|
self.num_anchors = num_anchors
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0, 2, 3, 1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 2)
|
||||||
|
|
||||||
|
|
||||||
|
class BboxHead(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, inchannels=512, num_anchors=3):
|
||||||
|
super(BboxHead, self).__init__()
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0, 2, 3, 1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 4)
|
||||||
|
|
||||||
|
|
||||||
|
class LandmarkHead(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, inchannels=512, num_anchors=3):
|
||||||
|
super(LandmarkHead, self).__init__()
|
||||||
|
self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1x1(x)
|
||||||
|
out = out.permute(0, 2, 3, 1).contiguous()
|
||||||
|
|
||||||
|
return out.view(out.shape[0], -1, 10)
|
||||||
|
|
||||||
|
|
||||||
|
def make_class_head(fpn_num=3, inchannels=64, anchor_num=2):
|
||||||
|
classhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
classhead.append(ClassHead(inchannels, anchor_num))
|
||||||
|
return classhead
|
||||||
|
|
||||||
|
|
||||||
|
def make_bbox_head(fpn_num=3, inchannels=64, anchor_num=2):
|
||||||
|
bboxhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
bboxhead.append(BboxHead(inchannels, anchor_num))
|
||||||
|
return bboxhead
|
||||||
|
|
||||||
|
|
||||||
|
def make_landmark_head(fpn_num=3, inchannels=64, anchor_num=2):
|
||||||
|
landmarkhead = nn.ModuleList()
|
||||||
|
for i in range(fpn_num):
|
||||||
|
landmarkhead.append(LandmarkHead(inchannels, anchor_num))
|
||||||
|
return landmarkhead
|
@ -0,0 +1,20 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .hopenet_arch import HopeNet
|
||||||
|
|
||||||
|
|
||||||
|
def init_headpose_model(model_name, half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'hopenet':
|
||||||
|
model = HopeNet('resnet', [3, 4, 6, 3], 66)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/headpose_hopenet.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)['params']
|
||||||
|
model.load_state_dict(load_net, strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,72 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torchvision
|
||||||
|
|
||||||
|
|
||||||
|
class HopeNet(nn.Module):
|
||||||
|
# Hopenet with 3 output layers for yaw, pitch and roll
|
||||||
|
# Predicts Euler angles by binning and regression with the expected value
|
||||||
|
def __init__(self, block, layers, num_bins):
|
||||||
|
super(HopeNet, self).__init__()
|
||||||
|
if block == 'resnet':
|
||||||
|
block = torchvision.models.resnet.Bottleneck
|
||||||
|
self.inplanes = 64
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||||
|
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||||
|
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||||
|
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||||
|
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||||
|
self.avgpool = nn.AvgPool2d(7)
|
||||||
|
self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
|
||||||
|
self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
|
||||||
|
self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
|
||||||
|
|
||||||
|
self.idx_tensor = torch.arange(66).float()
|
||||||
|
|
||||||
|
def _make_layer(self, block, planes, blocks, stride=1):
|
||||||
|
downsample = None
|
||||||
|
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||||
|
downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(planes * block.expansion),
|
||||||
|
)
|
||||||
|
|
||||||
|
layers = []
|
||||||
|
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||||
|
self.inplanes = planes * block.expansion
|
||||||
|
for i in range(1, blocks):
|
||||||
|
layers.append(block(self.inplanes, planes))
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def softmax_temperature(tensor, temperature):
|
||||||
|
result = torch.exp(tensor / temperature)
|
||||||
|
result = torch.div(result, torch.sum(result, 1).unsqueeze(1).expand_as(result))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def bin2degree(self, predict):
|
||||||
|
predict = self.softmax_temperature(predict, 1)
|
||||||
|
return torch.sum(predict * self.idx_tensor.type_as(predict), 1) * 3 - 99
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.relu(self.bn1(self.conv1(x)))
|
||||||
|
x = self.maxpool(x)
|
||||||
|
|
||||||
|
x = self.layer1(x)
|
||||||
|
x = self.layer2(x)
|
||||||
|
x = self.layer3(x)
|
||||||
|
x = self.layer4(x)
|
||||||
|
|
||||||
|
x = self.avgpool(x)
|
||||||
|
x = x.view(x.size(0), -1)
|
||||||
|
pre_yaw = self.fc_yaw(x)
|
||||||
|
pre_pitch = self.fc_pitch(x)
|
||||||
|
pre_roll = self.fc_roll(x)
|
||||||
|
|
||||||
|
yaw = self.bin2degree(pre_yaw)
|
||||||
|
pitch = self.bin2degree(pre_pitch)
|
||||||
|
roll = self.bin2degree(pre_roll)
|
||||||
|
return yaw, pitch, roll
|
@ -0,0 +1,27 @@
|
|||||||
|
import torch
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .modnet import MODNet
|
||||||
|
|
||||||
|
|
||||||
|
def init_matting_model(model_name='modnet', half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'modnet':
|
||||||
|
model = MODNet(backbone_pretrained=False)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/matting_modnet_portrait.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
# TODO: clean pretrained model
|
||||||
|
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
||||||
|
# remove unnecessary 'module.'
|
||||||
|
for k, v in deepcopy(load_net).items():
|
||||||
|
if k.startswith('module.'):
|
||||||
|
load_net[k[7:]] = v
|
||||||
|
load_net.pop(k)
|
||||||
|
model.load_state_dict(load_net, strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,80 @@
|
|||||||
|
import os
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
from .mobilenetv2 import MobileNetV2
|
||||||
|
|
||||||
|
|
||||||
|
class BaseBackbone(nn.Module):
|
||||||
|
""" Superclass of Replaceable Backbone Model for Semantic Estimation
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(BaseBackbone, self).__init__()
|
||||||
|
self.in_channels = in_channels
|
||||||
|
|
||||||
|
self.model = None
|
||||||
|
self.enc_channels = []
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def load_pretrained_ckpt(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNetV2Backbone(BaseBackbone):
|
||||||
|
""" MobileNetV2 Backbone
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(MobileNetV2Backbone, self).__init__(in_channels)
|
||||||
|
|
||||||
|
self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
|
||||||
|
self.enc_channels = [16, 24, 32, 96, 1280]
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
|
||||||
|
x = self.model.features[0](x)
|
||||||
|
x = self.model.features[1](x)
|
||||||
|
enc2x = x
|
||||||
|
|
||||||
|
# x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
|
||||||
|
x = self.model.features[2](x)
|
||||||
|
x = self.model.features[3](x)
|
||||||
|
enc4x = x
|
||||||
|
|
||||||
|
# x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
|
||||||
|
x = self.model.features[4](x)
|
||||||
|
x = self.model.features[5](x)
|
||||||
|
x = self.model.features[6](x)
|
||||||
|
enc8x = x
|
||||||
|
|
||||||
|
# x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
|
||||||
|
x = self.model.features[7](x)
|
||||||
|
x = self.model.features[8](x)
|
||||||
|
x = self.model.features[9](x)
|
||||||
|
x = self.model.features[10](x)
|
||||||
|
x = self.model.features[11](x)
|
||||||
|
x = self.model.features[12](x)
|
||||||
|
x = self.model.features[13](x)
|
||||||
|
enc16x = x
|
||||||
|
|
||||||
|
# x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
|
||||||
|
x = self.model.features[14](x)
|
||||||
|
x = self.model.features[15](x)
|
||||||
|
x = self.model.features[16](x)
|
||||||
|
x = self.model.features[17](x)
|
||||||
|
x = self.model.features[18](x)
|
||||||
|
enc32x = x
|
||||||
|
return [enc2x, enc4x, enc8x, enc16x, enc32x]
|
||||||
|
|
||||||
|
def load_pretrained_ckpt(self):
|
||||||
|
# the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
|
||||||
|
ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
|
||||||
|
if not os.path.exists(ckpt_path):
|
||||||
|
print('cannot find the pretrained mobilenetv2 backbone')
|
||||||
|
exit()
|
||||||
|
|
||||||
|
ckpt = torch.load(ckpt_path)
|
||||||
|
self.model.load_state_dict(ckpt)
|
@ -0,0 +1,192 @@
|
|||||||
|
""" This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Useful functions
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _make_divisible(v, divisor, min_value=None):
|
||||||
|
if min_value is None:
|
||||||
|
min_value = divisor
|
||||||
|
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
||||||
|
# Make sure that round down does not go down by more than 10%.
|
||||||
|
if new_v < 0.9 * v:
|
||||||
|
new_v += divisor
|
||||||
|
return new_v
|
||||||
|
|
||||||
|
|
||||||
|
def conv_bn(inp, oup, stride):
|
||||||
|
return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True))
|
||||||
|
|
||||||
|
|
||||||
|
def conv_1x1_bn(inp, oup):
|
||||||
|
return nn.Sequential(nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True))
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Class of Inverted Residual block
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class InvertedResidual(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, inp, oup, stride, expansion, dilation=1):
|
||||||
|
super(InvertedResidual, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
assert stride in [1, 2]
|
||||||
|
|
||||||
|
hidden_dim = round(inp * expansion)
|
||||||
|
self.use_res_connect = self.stride == 1 and inp == oup
|
||||||
|
|
||||||
|
if expansion == 1:
|
||||||
|
self.conv = nn.Sequential(
|
||||||
|
# dw
|
||||||
|
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
||||||
|
nn.BatchNorm2d(hidden_dim),
|
||||||
|
nn.ReLU6(inplace=True),
|
||||||
|
# pw-linear
|
||||||
|
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.conv = nn.Sequential(
|
||||||
|
# pw
|
||||||
|
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
||||||
|
nn.BatchNorm2d(hidden_dim),
|
||||||
|
nn.ReLU6(inplace=True),
|
||||||
|
# dw
|
||||||
|
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
||||||
|
nn.BatchNorm2d(hidden_dim),
|
||||||
|
nn.ReLU6(inplace=True),
|
||||||
|
# pw-linear
|
||||||
|
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
||||||
|
nn.BatchNorm2d(oup),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.use_res_connect:
|
||||||
|
return x + self.conv(x)
|
||||||
|
else:
|
||||||
|
return self.conv(x)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Class of MobileNetV2
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class MobileNetV2(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
|
||||||
|
super(MobileNetV2, self).__init__()
|
||||||
|
self.in_channels = in_channels
|
||||||
|
self.num_classes = num_classes
|
||||||
|
input_channel = 32
|
||||||
|
last_channel = 1280
|
||||||
|
interverted_residual_setting = [
|
||||||
|
# t, c, n, s
|
||||||
|
[1, 16, 1, 1],
|
||||||
|
[expansion, 24, 2, 2],
|
||||||
|
[expansion, 32, 3, 2],
|
||||||
|
[expansion, 64, 4, 2],
|
||||||
|
[expansion, 96, 3, 1],
|
||||||
|
[expansion, 160, 3, 2],
|
||||||
|
[expansion, 320, 1, 1],
|
||||||
|
]
|
||||||
|
|
||||||
|
# building first layer
|
||||||
|
input_channel = _make_divisible(input_channel * alpha, 8)
|
||||||
|
self.last_channel = _make_divisible(last_channel * alpha, 8) if alpha > 1.0 else last_channel
|
||||||
|
self.features = [conv_bn(self.in_channels, input_channel, 2)]
|
||||||
|
|
||||||
|
# building inverted residual blocks
|
||||||
|
for t, c, n, s in interverted_residual_setting:
|
||||||
|
output_channel = _make_divisible(int(c * alpha), 8)
|
||||||
|
for i in range(n):
|
||||||
|
if i == 0:
|
||||||
|
self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
|
||||||
|
else:
|
||||||
|
self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
|
||||||
|
input_channel = output_channel
|
||||||
|
|
||||||
|
# building last several layers
|
||||||
|
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
||||||
|
|
||||||
|
# make it nn.Sequential
|
||||||
|
self.features = nn.Sequential(*self.features)
|
||||||
|
|
||||||
|
# building classifier
|
||||||
|
if self.num_classes is not None:
|
||||||
|
self.classifier = nn.Sequential(
|
||||||
|
nn.Dropout(0.2),
|
||||||
|
nn.Linear(self.last_channel, num_classes),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize weights
|
||||||
|
self._init_weights()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Stage1
|
||||||
|
x = self.features[0](x)
|
||||||
|
x = self.features[1](x)
|
||||||
|
# Stage2
|
||||||
|
x = self.features[2](x)
|
||||||
|
x = self.features[3](x)
|
||||||
|
# Stage3
|
||||||
|
x = self.features[4](x)
|
||||||
|
x = self.features[5](x)
|
||||||
|
x = self.features[6](x)
|
||||||
|
# Stage4
|
||||||
|
x = self.features[7](x)
|
||||||
|
x = self.features[8](x)
|
||||||
|
x = self.features[9](x)
|
||||||
|
x = self.features[10](x)
|
||||||
|
x = self.features[11](x)
|
||||||
|
x = self.features[12](x)
|
||||||
|
x = self.features[13](x)
|
||||||
|
# Stage5
|
||||||
|
x = self.features[14](x)
|
||||||
|
x = self.features[15](x)
|
||||||
|
x = self.features[16](x)
|
||||||
|
x = self.features[17](x)
|
||||||
|
x = self.features[18](x)
|
||||||
|
|
||||||
|
# Classification
|
||||||
|
if self.num_classes is not None:
|
||||||
|
x = x.mean(dim=(2, 3))
|
||||||
|
x = self.classifier(x)
|
||||||
|
|
||||||
|
# Output
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _load_pretrained_model(self, pretrained_file):
|
||||||
|
pretrain_dict = torch.load(pretrained_file, map_location='cpu')
|
||||||
|
model_dict = {}
|
||||||
|
state_dict = self.state_dict()
|
||||||
|
print('[MobileNetV2] Loading pretrained model...')
|
||||||
|
for k, v in pretrain_dict.items():
|
||||||
|
if k in state_dict:
|
||||||
|
model_dict[k] = v
|
||||||
|
else:
|
||||||
|
print(k, 'is ignored')
|
||||||
|
state_dict.update(model_dict)
|
||||||
|
self.load_state_dict(state_dict)
|
||||||
|
|
||||||
|
def _init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||||
|
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||||
|
if m.bias is not None:
|
||||||
|
m.bias.data.zero_()
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
m.weight.data.fill_(1)
|
||||||
|
m.bias.data.zero_()
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
n = m.weight.size(1)
|
||||||
|
m.weight.data.normal_(0, 0.01)
|
||||||
|
m.bias.data.zero_()
|
@ -0,0 +1,267 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from .backbone import MobileNetV2Backbone
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# MODNet Basic Modules
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class IBNorm(nn.Module):
|
||||||
|
""" Combine Instance Norm and Batch Norm into One Layer
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(IBNorm, self).__init__()
|
||||||
|
in_channels = in_channels
|
||||||
|
self.bnorm_channels = int(in_channels / 2)
|
||||||
|
self.inorm_channels = in_channels - self.bnorm_channels
|
||||||
|
|
||||||
|
self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
|
||||||
|
self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
|
||||||
|
in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
|
||||||
|
|
||||||
|
return torch.cat((bn_x, in_x), 1)
|
||||||
|
|
||||||
|
|
||||||
|
class Conv2dIBNormRelu(nn.Module):
|
||||||
|
""" Convolution + IBNorm + ReLu
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels,
|
||||||
|
out_channels,
|
||||||
|
kernel_size,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
dilation=1,
|
||||||
|
groups=1,
|
||||||
|
bias=True,
|
||||||
|
with_ibn=True,
|
||||||
|
with_relu=True):
|
||||||
|
super(Conv2dIBNormRelu, self).__init__()
|
||||||
|
|
||||||
|
layers = [
|
||||||
|
nn.Conv2d(
|
||||||
|
in_channels,
|
||||||
|
out_channels,
|
||||||
|
kernel_size,
|
||||||
|
stride=stride,
|
||||||
|
padding=padding,
|
||||||
|
dilation=dilation,
|
||||||
|
groups=groups,
|
||||||
|
bias=bias)
|
||||||
|
]
|
||||||
|
|
||||||
|
if with_ibn:
|
||||||
|
layers.append(IBNorm(out_channels))
|
||||||
|
if with_relu:
|
||||||
|
layers.append(nn.ReLU(inplace=True))
|
||||||
|
|
||||||
|
self.layers = nn.Sequential(*layers)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.layers(x)
|
||||||
|
|
||||||
|
|
||||||
|
class SEBlock(nn.Module):
|
||||||
|
""" SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels, out_channels, reduction=1):
|
||||||
|
super(SEBlock, self).__init__()
|
||||||
|
self.pool = nn.AdaptiveAvgPool2d(1)
|
||||||
|
self.fc = nn.Sequential(
|
||||||
|
nn.Linear(in_channels, int(in_channels // reduction), bias=False), nn.ReLU(inplace=True),
|
||||||
|
nn.Linear(int(in_channels // reduction), out_channels, bias=False), nn.Sigmoid())
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, _, _ = x.size()
|
||||||
|
w = self.pool(x).view(b, c)
|
||||||
|
w = self.fc(w).view(b, c, 1, 1)
|
||||||
|
|
||||||
|
return x * w.expand_as(x)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# MODNet Branches
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class LRBranch(nn.Module):
|
||||||
|
""" Low Resolution Branch of MODNet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, backbone):
|
||||||
|
super(LRBranch, self).__init__()
|
||||||
|
|
||||||
|
enc_channels = backbone.enc_channels
|
||||||
|
|
||||||
|
self.backbone = backbone
|
||||||
|
self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
|
||||||
|
self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
|
||||||
|
self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
|
||||||
|
self.conv_lr = Conv2dIBNormRelu(
|
||||||
|
enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
|
||||||
|
|
||||||
|
def forward(self, img, inference):
|
||||||
|
enc_features = self.backbone.forward(img)
|
||||||
|
enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
|
||||||
|
|
||||||
|
enc32x = self.se_block(enc32x)
|
||||||
|
lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
lr16x = self.conv_lr16x(lr16x)
|
||||||
|
lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
lr8x = self.conv_lr8x(lr8x)
|
||||||
|
|
||||||
|
pred_semantic = None
|
||||||
|
if not inference:
|
||||||
|
lr = self.conv_lr(lr8x)
|
||||||
|
pred_semantic = torch.sigmoid(lr)
|
||||||
|
|
||||||
|
return pred_semantic, lr8x, [enc2x, enc4x]
|
||||||
|
|
||||||
|
|
||||||
|
class HRBranch(nn.Module):
|
||||||
|
""" High Resolution Branch of MODNet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, hr_channels, enc_channels):
|
||||||
|
super(HRBranch, self).__init__()
|
||||||
|
|
||||||
|
self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
|
||||||
|
self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
|
||||||
|
|
||||||
|
self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
|
||||||
|
self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
|
||||||
|
|
||||||
|
self.conv_hr4x = nn.Sequential(
|
||||||
|
Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.conv_hr2x = nn.Sequential(
|
||||||
|
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.conv_hr = nn.Sequential(
|
||||||
|
Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, img, enc2x, enc4x, lr8x, inference):
|
||||||
|
img2x = F.interpolate(img, scale_factor=1 / 2, mode='bilinear', align_corners=False)
|
||||||
|
img4x = F.interpolate(img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
|
||||||
|
|
||||||
|
enc2x = self.tohr_enc2x(enc2x)
|
||||||
|
hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
|
||||||
|
|
||||||
|
enc4x = self.tohr_enc4x(enc4x)
|
||||||
|
hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
|
||||||
|
|
||||||
|
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
|
||||||
|
|
||||||
|
hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
|
||||||
|
|
||||||
|
pred_detail = None
|
||||||
|
if not inference:
|
||||||
|
hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
hr = self.conv_hr(torch.cat((hr, img), dim=1))
|
||||||
|
pred_detail = torch.sigmoid(hr)
|
||||||
|
|
||||||
|
return pred_detail, hr2x
|
||||||
|
|
||||||
|
|
||||||
|
class FusionBranch(nn.Module):
|
||||||
|
""" Fusion Branch of MODNet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, hr_channels, enc_channels):
|
||||||
|
super(FusionBranch, self).__init__()
|
||||||
|
self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
|
||||||
|
|
||||||
|
self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
|
||||||
|
self.conv_f = nn.Sequential(
|
||||||
|
Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
|
||||||
|
Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, img, lr8x, hr2x):
|
||||||
|
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
lr4x = self.conv_lr4x(lr4x)
|
||||||
|
lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
|
||||||
|
f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
|
||||||
|
f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
|
||||||
|
f = self.conv_f(torch.cat((f, img), dim=1))
|
||||||
|
pred_matte = torch.sigmoid(f)
|
||||||
|
|
||||||
|
return pred_matte
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# MODNet
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class MODNet(nn.Module):
|
||||||
|
""" Architecture of MODNet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels=3, hr_channels=32, backbone_pretrained=True):
|
||||||
|
super(MODNet, self).__init__()
|
||||||
|
|
||||||
|
self.in_channels = in_channels
|
||||||
|
self.hr_channels = hr_channels
|
||||||
|
self.backbone_pretrained = backbone_pretrained
|
||||||
|
|
||||||
|
self.backbone = MobileNetV2Backbone(self.in_channels)
|
||||||
|
|
||||||
|
self.lr_branch = LRBranch(self.backbone)
|
||||||
|
self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
|
||||||
|
self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
|
||||||
|
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
self._init_conv(m)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
|
||||||
|
self._init_norm(m)
|
||||||
|
|
||||||
|
if self.backbone_pretrained:
|
||||||
|
self.backbone.load_pretrained_ckpt()
|
||||||
|
|
||||||
|
def forward(self, img, inference):
|
||||||
|
pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
|
||||||
|
pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
|
||||||
|
pred_matte = self.f_branch(img, lr8x, hr2x)
|
||||||
|
|
||||||
|
return pred_semantic, pred_detail, pred_matte
|
||||||
|
|
||||||
|
def freeze_norm(self):
|
||||||
|
norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
|
||||||
|
for m in self.modules():
|
||||||
|
for n in norm_types:
|
||||||
|
if isinstance(m, n):
|
||||||
|
m.eval()
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _init_conv(self, conv):
|
||||||
|
nn.init.kaiming_uniform_(conv.weight, a=0, mode='fan_in', nonlinearity='relu')
|
||||||
|
if conv.bias is not None:
|
||||||
|
nn.init.constant_(conv.bias, 0)
|
||||||
|
|
||||||
|
def _init_norm(self, norm):
|
||||||
|
if norm.weight is not None:
|
||||||
|
nn.init.constant_(norm.weight, 1)
|
||||||
|
nn.init.constant_(norm.bias, 0)
|
@ -0,0 +1,24 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .bisenet import BiSeNet
|
||||||
|
from .parsenet import ParseNet
|
||||||
|
|
||||||
|
|
||||||
|
def init_parsing_model(model_name='bisenet', half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'bisenet':
|
||||||
|
model = BiSeNet(num_class=19)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.0/parsing_bisenet.pth'
|
||||||
|
elif model_name == 'parsenet':
|
||||||
|
model = ParseNet(in_size=512, out_size=512, parsing_ch=19)
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
||||||
|
model.load_state_dict(load_net, strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,140 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from .resnet import ResNet18
|
||||||
|
|
||||||
|
|
||||||
|
class ConvBNReLU(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1):
|
||||||
|
super(ConvBNReLU, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(in_chan, out_chan, kernel_size=ks, stride=stride, padding=padding, bias=False)
|
||||||
|
self.bn = nn.BatchNorm2d(out_chan)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = F.relu(self.bn(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class BiSeNetOutput(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_chan, mid_chan, num_class):
|
||||||
|
super(BiSeNetOutput, self).__init__()
|
||||||
|
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
|
||||||
|
self.conv_out = nn.Conv2d(mid_chan, num_class, kernel_size=1, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
feat = self.conv(x)
|
||||||
|
out = self.conv_out(feat)
|
||||||
|
return out, feat
|
||||||
|
|
||||||
|
|
||||||
|
class AttentionRefinementModule(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_chan, out_chan):
|
||||||
|
super(AttentionRefinementModule, self).__init__()
|
||||||
|
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
|
||||||
|
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False)
|
||||||
|
self.bn_atten = nn.BatchNorm2d(out_chan)
|
||||||
|
self.sigmoid_atten = nn.Sigmoid()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
feat = self.conv(x)
|
||||||
|
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||||
|
atten = self.conv_atten(atten)
|
||||||
|
atten = self.bn_atten(atten)
|
||||||
|
atten = self.sigmoid_atten(atten)
|
||||||
|
out = torch.mul(feat, atten)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ContextPath(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(ContextPath, self).__init__()
|
||||||
|
self.resnet = ResNet18()
|
||||||
|
self.arm16 = AttentionRefinementModule(256, 128)
|
||||||
|
self.arm32 = AttentionRefinementModule(512, 128)
|
||||||
|
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||||
|
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||||
|
self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
feat8, feat16, feat32 = self.resnet(x)
|
||||||
|
h8, w8 = feat8.size()[2:]
|
||||||
|
h16, w16 = feat16.size()[2:]
|
||||||
|
h32, w32 = feat32.size()[2:]
|
||||||
|
|
||||||
|
avg = F.avg_pool2d(feat32, feat32.size()[2:])
|
||||||
|
avg = self.conv_avg(avg)
|
||||||
|
avg_up = F.interpolate(avg, (h32, w32), mode='nearest')
|
||||||
|
|
||||||
|
feat32_arm = self.arm32(feat32)
|
||||||
|
feat32_sum = feat32_arm + avg_up
|
||||||
|
feat32_up = F.interpolate(feat32_sum, (h16, w16), mode='nearest')
|
||||||
|
feat32_up = self.conv_head32(feat32_up)
|
||||||
|
|
||||||
|
feat16_arm = self.arm16(feat16)
|
||||||
|
feat16_sum = feat16_arm + feat32_up
|
||||||
|
feat16_up = F.interpolate(feat16_sum, (h8, w8), mode='nearest')
|
||||||
|
feat16_up = self.conv_head16(feat16_up)
|
||||||
|
|
||||||
|
return feat8, feat16_up, feat32_up # x8, x8, x16
|
||||||
|
|
||||||
|
|
||||||
|
class FeatureFusionModule(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_chan, out_chan):
|
||||||
|
super(FeatureFusionModule, self).__init__()
|
||||||
|
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
|
||||||
|
self.conv1 = nn.Conv2d(out_chan, out_chan // 4, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.conv2 = nn.Conv2d(out_chan // 4, out_chan, kernel_size=1, stride=1, padding=0, bias=False)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.sigmoid = nn.Sigmoid()
|
||||||
|
|
||||||
|
def forward(self, fsp, fcp):
|
||||||
|
fcat = torch.cat([fsp, fcp], dim=1)
|
||||||
|
feat = self.convblk(fcat)
|
||||||
|
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||||
|
atten = self.conv1(atten)
|
||||||
|
atten = self.relu(atten)
|
||||||
|
atten = self.conv2(atten)
|
||||||
|
atten = self.sigmoid(atten)
|
||||||
|
feat_atten = torch.mul(feat, atten)
|
||||||
|
feat_out = feat_atten + feat
|
||||||
|
return feat_out
|
||||||
|
|
||||||
|
|
||||||
|
class BiSeNet(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, num_class):
|
||||||
|
super(BiSeNet, self).__init__()
|
||||||
|
self.cp = ContextPath()
|
||||||
|
self.ffm = FeatureFusionModule(256, 256)
|
||||||
|
self.conv_out = BiSeNetOutput(256, 256, num_class)
|
||||||
|
self.conv_out16 = BiSeNetOutput(128, 64, num_class)
|
||||||
|
self.conv_out32 = BiSeNetOutput(128, 64, num_class)
|
||||||
|
|
||||||
|
def forward(self, x, return_feat=False):
|
||||||
|
h, w = x.size()[2:]
|
||||||
|
feat_res8, feat_cp8, feat_cp16 = self.cp(x) # return res3b1 feature
|
||||||
|
feat_sp = feat_res8 # replace spatial path feature with res3b1 feature
|
||||||
|
feat_fuse = self.ffm(feat_sp, feat_cp8)
|
||||||
|
|
||||||
|
out, feat = self.conv_out(feat_fuse)
|
||||||
|
out16, feat16 = self.conv_out16(feat_cp8)
|
||||||
|
out32, feat32 = self.conv_out32(feat_cp16)
|
||||||
|
|
||||||
|
out = F.interpolate(out, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
out16 = F.interpolate(out16, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
out32 = F.interpolate(out32, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
|
||||||
|
if return_feat:
|
||||||
|
feat = F.interpolate(feat, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
feat16 = F.interpolate(feat16, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
feat32 = F.interpolate(feat32, (h, w), mode='bilinear', align_corners=True)
|
||||||
|
return out, out16, out32, feat, feat16, feat32
|
||||||
|
else:
|
||||||
|
return out, out16, out32
|
@ -0,0 +1,194 @@
|
|||||||
|
"""Modified from https://github.com/chaofengc/PSFRGAN
|
||||||
|
"""
|
||||||
|
import numpy as np
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class NormLayer(nn.Module):
|
||||||
|
"""Normalization Layers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channels: input channels, for batch norm and instance norm.
|
||||||
|
input_size: input shape without batch size, for layer norm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, channels, normalize_shape=None, norm_type='bn'):
|
||||||
|
super(NormLayer, self).__init__()
|
||||||
|
norm_type = norm_type.lower()
|
||||||
|
self.norm_type = norm_type
|
||||||
|
if norm_type == 'bn':
|
||||||
|
self.norm = nn.BatchNorm2d(channels, affine=True)
|
||||||
|
elif norm_type == 'in':
|
||||||
|
self.norm = nn.InstanceNorm2d(channels, affine=False)
|
||||||
|
elif norm_type == 'gn':
|
||||||
|
self.norm = nn.GroupNorm(32, channels, affine=True)
|
||||||
|
elif norm_type == 'pixel':
|
||||||
|
self.norm = lambda x: F.normalize(x, p=2, dim=1)
|
||||||
|
elif norm_type == 'layer':
|
||||||
|
self.norm = nn.LayerNorm(normalize_shape)
|
||||||
|
elif norm_type == 'none':
|
||||||
|
self.norm = lambda x: x * 1.0
|
||||||
|
else:
|
||||||
|
assert 1 == 0, f'Norm type {norm_type} not support.'
|
||||||
|
|
||||||
|
def forward(self, x, ref=None):
|
||||||
|
if self.norm_type == 'spade':
|
||||||
|
return self.norm(x, ref)
|
||||||
|
else:
|
||||||
|
return self.norm(x)
|
||||||
|
|
||||||
|
|
||||||
|
class ReluLayer(nn.Module):
|
||||||
|
"""Relu Layer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
relu type: type of relu layer, candidates are
|
||||||
|
- ReLU
|
||||||
|
- LeakyReLU: default relu slope 0.2
|
||||||
|
- PRelu
|
||||||
|
- SELU
|
||||||
|
- none: direct pass
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, channels, relu_type='relu'):
|
||||||
|
super(ReluLayer, self).__init__()
|
||||||
|
relu_type = relu_type.lower()
|
||||||
|
if relu_type == 'relu':
|
||||||
|
self.func = nn.ReLU(True)
|
||||||
|
elif relu_type == 'leakyrelu':
|
||||||
|
self.func = nn.LeakyReLU(0.2, inplace=True)
|
||||||
|
elif relu_type == 'prelu':
|
||||||
|
self.func = nn.PReLU(channels)
|
||||||
|
elif relu_type == 'selu':
|
||||||
|
self.func = nn.SELU(True)
|
||||||
|
elif relu_type == 'none':
|
||||||
|
self.func = lambda x: x * 1.0
|
||||||
|
else:
|
||||||
|
assert 1 == 0, f'Relu type {relu_type} not support.'
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.func(x)
|
||||||
|
|
||||||
|
|
||||||
|
class ConvLayer(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_channels,
|
||||||
|
out_channels,
|
||||||
|
kernel_size=3,
|
||||||
|
scale='none',
|
||||||
|
norm_type='none',
|
||||||
|
relu_type='none',
|
||||||
|
use_pad=True,
|
||||||
|
bias=True):
|
||||||
|
super(ConvLayer, self).__init__()
|
||||||
|
self.use_pad = use_pad
|
||||||
|
self.norm_type = norm_type
|
||||||
|
if norm_type in ['bn']:
|
||||||
|
bias = False
|
||||||
|
|
||||||
|
stride = 2 if scale == 'down' else 1
|
||||||
|
|
||||||
|
self.scale_func = lambda x: x
|
||||||
|
if scale == 'up':
|
||||||
|
self.scale_func = lambda x: nn.functional.interpolate(x, scale_factor=2, mode='nearest')
|
||||||
|
|
||||||
|
self.reflection_pad = nn.ReflectionPad2d(int(np.ceil((kernel_size - 1.) / 2)))
|
||||||
|
self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, bias=bias)
|
||||||
|
|
||||||
|
self.relu = ReluLayer(out_channels, relu_type)
|
||||||
|
self.norm = NormLayer(out_channels, norm_type=norm_type)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.scale_func(x)
|
||||||
|
if self.use_pad:
|
||||||
|
out = self.reflection_pad(out)
|
||||||
|
out = self.conv2d(out)
|
||||||
|
out = self.norm(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualBlock(nn.Module):
|
||||||
|
"""
|
||||||
|
Residual block recommended in: http://torch.ch/blog/2016/02/04/resnets.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, c_in, c_out, relu_type='prelu', norm_type='bn', scale='none'):
|
||||||
|
super(ResidualBlock, self).__init__()
|
||||||
|
|
||||||
|
if scale == 'none' and c_in == c_out:
|
||||||
|
self.shortcut_func = lambda x: x
|
||||||
|
else:
|
||||||
|
self.shortcut_func = ConvLayer(c_in, c_out, 3, scale)
|
||||||
|
|
||||||
|
scale_config_dict = {'down': ['none', 'down'], 'up': ['up', 'none'], 'none': ['none', 'none']}
|
||||||
|
scale_conf = scale_config_dict[scale]
|
||||||
|
|
||||||
|
self.conv1 = ConvLayer(c_in, c_out, 3, scale_conf[0], norm_type=norm_type, relu_type=relu_type)
|
||||||
|
self.conv2 = ConvLayer(c_out, c_out, 3, scale_conf[1], norm_type=norm_type, relu_type='none')
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
identity = self.shortcut_func(x)
|
||||||
|
|
||||||
|
res = self.conv1(x)
|
||||||
|
res = self.conv2(res)
|
||||||
|
return identity + res
|
||||||
|
|
||||||
|
|
||||||
|
class ParseNet(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
in_size=128,
|
||||||
|
out_size=128,
|
||||||
|
min_feat_size=32,
|
||||||
|
base_ch=64,
|
||||||
|
parsing_ch=19,
|
||||||
|
res_depth=10,
|
||||||
|
relu_type='LeakyReLU',
|
||||||
|
norm_type='bn',
|
||||||
|
ch_range=[32, 256]):
|
||||||
|
super().__init__()
|
||||||
|
self.res_depth = res_depth
|
||||||
|
act_args = {'norm_type': norm_type, 'relu_type': relu_type}
|
||||||
|
min_ch, max_ch = ch_range
|
||||||
|
|
||||||
|
ch_clip = lambda x: max(min_ch, min(x, max_ch)) # noqa: E731
|
||||||
|
min_feat_size = min(in_size, min_feat_size)
|
||||||
|
|
||||||
|
down_steps = int(np.log2(in_size // min_feat_size))
|
||||||
|
up_steps = int(np.log2(out_size // min_feat_size))
|
||||||
|
|
||||||
|
# =============== define encoder-body-decoder ====================
|
||||||
|
self.encoder = []
|
||||||
|
self.encoder.append(ConvLayer(3, base_ch, 3, 1))
|
||||||
|
head_ch = base_ch
|
||||||
|
for i in range(down_steps):
|
||||||
|
cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2)
|
||||||
|
self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args))
|
||||||
|
head_ch = head_ch * 2
|
||||||
|
|
||||||
|
self.body = []
|
||||||
|
for i in range(res_depth):
|
||||||
|
self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args))
|
||||||
|
|
||||||
|
self.decoder = []
|
||||||
|
for i in range(up_steps):
|
||||||
|
cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2)
|
||||||
|
self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args))
|
||||||
|
head_ch = head_ch // 2
|
||||||
|
|
||||||
|
self.encoder = nn.Sequential(*self.encoder)
|
||||||
|
self.body = nn.Sequential(*self.body)
|
||||||
|
self.decoder = nn.Sequential(*self.decoder)
|
||||||
|
self.out_img_conv = ConvLayer(ch_clip(head_ch), 3)
|
||||||
|
self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
feat = self.encoder(x)
|
||||||
|
x = feat + self.body(feat)
|
||||||
|
x = self.decoder(x)
|
||||||
|
out_img = self.out_img_conv(x)
|
||||||
|
out_mask = self.out_mask_conv(x)
|
||||||
|
return out_mask, out_img
|
@ -0,0 +1,69 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
def conv3x3(in_planes, out_planes, stride=1):
|
||||||
|
"""3x3 convolution with padding"""
|
||||||
|
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
|
||||||
|
|
||||||
|
class BasicBlock(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, in_chan, out_chan, stride=1):
|
||||||
|
super(BasicBlock, self).__init__()
|
||||||
|
self.conv1 = conv3x3(in_chan, out_chan, stride)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_chan)
|
||||||
|
self.conv2 = conv3x3(out_chan, out_chan)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_chan)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.downsample = None
|
||||||
|
if in_chan != out_chan or stride != 1:
|
||||||
|
self.downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(out_chan),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
residual = self.conv1(x)
|
||||||
|
residual = F.relu(self.bn1(residual))
|
||||||
|
residual = self.conv2(residual)
|
||||||
|
residual = self.bn2(residual)
|
||||||
|
|
||||||
|
shortcut = x
|
||||||
|
if self.downsample is not None:
|
||||||
|
shortcut = self.downsample(x)
|
||||||
|
|
||||||
|
out = shortcut + residual
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def create_layer_basic(in_chan, out_chan, bnum, stride=1):
|
||||||
|
layers = [BasicBlock(in_chan, out_chan, stride=stride)]
|
||||||
|
for i in range(bnum - 1):
|
||||||
|
layers.append(BasicBlock(out_chan, out_chan, stride=1))
|
||||||
|
return nn.Sequential(*layers)
|
||||||
|
|
||||||
|
|
||||||
|
class ResNet18(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(ResNet18, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(64)
|
||||||
|
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||||
|
self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
|
||||||
|
self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
|
||||||
|
self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
|
||||||
|
self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv1(x)
|
||||||
|
x = F.relu(self.bn1(x))
|
||||||
|
x = self.maxpool(x)
|
||||||
|
|
||||||
|
x = self.layer1(x)
|
||||||
|
feat8 = self.layer2(x) # 1/8
|
||||||
|
feat16 = self.layer3(feat8) # 1/16
|
||||||
|
feat32 = self.layer4(feat16) # 1/32
|
||||||
|
return feat8, feat16, feat32
|
@ -0,0 +1 @@
|
|||||||
|
vendored from git@github.com:xinntao/facexlib.git @ 260620ae93990a300f4b16448df9bb459f1caba9
|
@ -0,0 +1,19 @@
|
|||||||
|
import torch
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.utils import load_file_from_url
|
||||||
|
from .arcface_arch import Backbone
|
||||||
|
|
||||||
|
|
||||||
|
def init_recognition_model(model_name, half=False, device='cuda', model_rootpath=None):
|
||||||
|
if model_name == 'arcface':
|
||||||
|
model = Backbone(num_layers=50, drop_ratio=0.6, mode='ir_se').to('cuda').eval()
|
||||||
|
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/recognition_arcface_ir_se50.pth'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'{model_name} is not implemented.')
|
||||||
|
|
||||||
|
model_path = load_file_from_url(
|
||||||
|
url=model_url, model_dir='facexlib/weights', progress=True, file_name=None, save_dir=model_rootpath)
|
||||||
|
model.load_state_dict(torch.load(model_path), strict=True)
|
||||||
|
model.eval()
|
||||||
|
model = model.to(device)
|
||||||
|
return model
|
@ -0,0 +1,238 @@
|
|||||||
|
import torch
|
||||||
|
from collections import namedtuple
|
||||||
|
from torch.nn import (AdaptiveAvgPool2d, BatchNorm1d, BatchNorm2d, Conv2d, Dropout, Linear, MaxPool2d, Module, PReLU,
|
||||||
|
ReLU, Sequential, Sigmoid)
|
||||||
|
|
||||||
|
# Original Arcface Model
|
||||||
|
|
||||||
|
|
||||||
|
class Flatten(Module):
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
return input.view(input.size(0), -1)
|
||||||
|
|
||||||
|
|
||||||
|
def l2_norm(input, axis=1):
|
||||||
|
norm = torch.norm(input, 2, axis, True)
|
||||||
|
output = torch.div(input, norm)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class SEModule(Module):
|
||||||
|
|
||||||
|
def __init__(self, channels, reduction):
|
||||||
|
super(SEModule, self).__init__()
|
||||||
|
self.avg_pool = AdaptiveAvgPool2d(1)
|
||||||
|
self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
||||||
|
self.relu = ReLU(inplace=True)
|
||||||
|
self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
||||||
|
self.sigmoid = Sigmoid()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
module_input = x
|
||||||
|
x = self.avg_pool(x)
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.relu(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.sigmoid(x)
|
||||||
|
return module_input * x
|
||||||
|
|
||||||
|
|
||||||
|
class bottleneck_IR(Module):
|
||||||
|
|
||||||
|
def __init__(self, in_channel, depth, stride):
|
||||||
|
super(bottleneck_IR, self).__init__()
|
||||||
|
if in_channel == depth:
|
||||||
|
self.shortcut_layer = MaxPool2d(1, stride)
|
||||||
|
else:
|
||||||
|
self.shortcut_layer = Sequential(Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
|
||||||
|
self.res_layer = Sequential(
|
||||||
|
BatchNorm2d(in_channel), Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
|
||||||
|
Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
shortcut = self.shortcut_layer(x)
|
||||||
|
res = self.res_layer(x)
|
||||||
|
return res + shortcut
|
||||||
|
|
||||||
|
|
||||||
|
class bottleneck_IR_SE(Module):
|
||||||
|
|
||||||
|
def __init__(self, in_channel, depth, stride):
|
||||||
|
super(bottleneck_IR_SE, self).__init__()
|
||||||
|
if in_channel == depth:
|
||||||
|
self.shortcut_layer = MaxPool2d(1, stride)
|
||||||
|
else:
|
||||||
|
self.shortcut_layer = Sequential(Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
|
||||||
|
self.res_layer = Sequential(
|
||||||
|
BatchNorm2d(in_channel), Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
|
||||||
|
Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth), SEModule(depth, 16))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
shortcut = self.shortcut_layer(x)
|
||||||
|
res = self.res_layer(x)
|
||||||
|
return res + shortcut
|
||||||
|
|
||||||
|
|
||||||
|
class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
|
||||||
|
'''A named tuple describing a ResNet block.'''
|
||||||
|
|
||||||
|
|
||||||
|
def get_block(in_channel, depth, num_units, stride=2):
|
||||||
|
return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
|
||||||
|
|
||||||
|
|
||||||
|
def get_blocks(num_layers):
|
||||||
|
if num_layers == 50:
|
||||||
|
blocks = [
|
||||||
|
get_block(in_channel=64, depth=64, num_units=3),
|
||||||
|
get_block(in_channel=64, depth=128, num_units=4),
|
||||||
|
get_block(in_channel=128, depth=256, num_units=14),
|
||||||
|
get_block(in_channel=256, depth=512, num_units=3)
|
||||||
|
]
|
||||||
|
elif num_layers == 100:
|
||||||
|
blocks = [
|
||||||
|
get_block(in_channel=64, depth=64, num_units=3),
|
||||||
|
get_block(in_channel=64, depth=128, num_units=13),
|
||||||
|
get_block(in_channel=128, depth=256, num_units=30),
|
||||||
|
get_block(in_channel=256, depth=512, num_units=3)
|
||||||
|
]
|
||||||
|
elif num_layers == 152:
|
||||||
|
blocks = [
|
||||||
|
get_block(in_channel=64, depth=64, num_units=3),
|
||||||
|
get_block(in_channel=64, depth=128, num_units=8),
|
||||||
|
get_block(in_channel=128, depth=256, num_units=36),
|
||||||
|
get_block(in_channel=256, depth=512, num_units=3)
|
||||||
|
]
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
class Backbone(Module):
|
||||||
|
|
||||||
|
def __init__(self, num_layers, drop_ratio, mode='ir'):
|
||||||
|
super(Backbone, self).__init__()
|
||||||
|
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
|
||||||
|
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
|
||||||
|
blocks = get_blocks(num_layers)
|
||||||
|
if mode == 'ir':
|
||||||
|
unit_module = bottleneck_IR
|
||||||
|
elif mode == 'ir_se':
|
||||||
|
unit_module = bottleneck_IR_SE
|
||||||
|
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64))
|
||||||
|
self.output_layer = Sequential(
|
||||||
|
BatchNorm2d(512), Dropout(drop_ratio), Flatten(), Linear(512 * 7 * 7, 512), BatchNorm1d(512))
|
||||||
|
modules = []
|
||||||
|
for block in blocks:
|
||||||
|
for bottleneck in block:
|
||||||
|
modules.append(unit_module(bottleneck.in_channel, bottleneck.depth, bottleneck.stride))
|
||||||
|
self.body = Sequential(*modules)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.input_layer(x)
|
||||||
|
x = self.body(x)
|
||||||
|
x = self.output_layer(x)
|
||||||
|
return l2_norm(x)
|
||||||
|
|
||||||
|
|
||||||
|
# MobileFaceNet
|
||||||
|
|
||||||
|
|
||||||
|
class Conv_block(Module):
|
||||||
|
|
||||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||||
|
super(Conv_block, self).__init__()
|
||||||
|
self.conv = Conv2d(
|
||||||
|
in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
||||||
|
self.bn = BatchNorm2d(out_c)
|
||||||
|
self.prelu = PReLU(out_c)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.bn(x)
|
||||||
|
x = self.prelu(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Linear_block(Module):
|
||||||
|
|
||||||
|
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||||
|
super(Linear_block, self).__init__()
|
||||||
|
self.conv = Conv2d(
|
||||||
|
in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding, bias=False)
|
||||||
|
self.bn = BatchNorm2d(out_c)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.bn(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Depth_Wise(Module):
|
||||||
|
|
||||||
|
def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
||||||
|
super(Depth_Wise, self).__init__()
|
||||||
|
self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
|
||||||
|
self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||||
|
self.residual = residual
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.residual:
|
||||||
|
short_cut = x
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.conv_dw(x)
|
||||||
|
x = self.project(x)
|
||||||
|
if self.residual:
|
||||||
|
output = short_cut + x
|
||||||
|
else:
|
||||||
|
output = x
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
class Residual(Module):
|
||||||
|
|
||||||
|
def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
||||||
|
super(Residual, self).__init__()
|
||||||
|
modules = []
|
||||||
|
for _ in range(num_block):
|
||||||
|
modules.append(
|
||||||
|
Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
|
||||||
|
self.model = Sequential(*modules)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.model(x)
|
||||||
|
|
||||||
|
|
||||||
|
class MobileFaceNet(Module):
|
||||||
|
|
||||||
|
def __init__(self, embedding_size):
|
||||||
|
super(MobileFaceNet, self).__init__()
|
||||||
|
self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
||||||
|
self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
|
||||||
|
self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
|
||||||
|
self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
|
||||||
|
self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
|
||||||
|
self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||||
|
self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
||||||
|
self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0))
|
||||||
|
self.conv_6_flatten = Flatten()
|
||||||
|
self.linear = Linear(512, embedding_size, bias=False)
|
||||||
|
self.bn = BatchNorm1d(embedding_size)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.conv2_dw(out)
|
||||||
|
out = self.conv_23(out)
|
||||||
|
out = self.conv_3(out)
|
||||||
|
out = self.conv_34(out)
|
||||||
|
out = self.conv_4(out)
|
||||||
|
out = self.conv_45(out)
|
||||||
|
out = self.conv_5(out)
|
||||||
|
out = self.conv_6_sep(out)
|
||||||
|
out = self.conv_6_dw(out)
|
||||||
|
out = self.conv_6_flatten(out)
|
||||||
|
out = self.linear(out)
|
||||||
|
out = self.bn(out)
|
||||||
|
return l2_norm(out)
|
@ -0,0 +1 @@
|
|||||||
|
https://github.com/abewley/sort
|
@ -0,0 +1,71 @@
|
|||||||
|
"""
|
||||||
|
For each detected item, it computes the intersection over union (IOU) w.r.t.
|
||||||
|
each tracked object. (IOU matrix)
|
||||||
|
Then, it applies the Hungarian algorithm (via linear_assignment) to assign each
|
||||||
|
det. item to the best possible tracked item (i.e. to the one with max IOU)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from numba import jit
|
||||||
|
from scipy.optimize import linear_sum_assignment as linear_assignment
|
||||||
|
|
||||||
|
|
||||||
|
@jit
|
||||||
|
def iou(bb_test, bb_gt):
|
||||||
|
"""Computes IOU between two bboxes in the form [x1,y1,x2,y2]
|
||||||
|
"""
|
||||||
|
xx1 = np.maximum(bb_test[0], bb_gt[0])
|
||||||
|
yy1 = np.maximum(bb_test[1], bb_gt[1])
|
||||||
|
xx2 = np.minimum(bb_test[2], bb_gt[2])
|
||||||
|
yy2 = np.minimum(bb_test[3], bb_gt[3])
|
||||||
|
w = np.maximum(0., xx2 - xx1)
|
||||||
|
h = np.maximum(0., yy2 - yy1)
|
||||||
|
wh = w * h
|
||||||
|
o = wh / ((bb_test[2] - bb_test[0]) * (bb_test[3] - bb_test[1]) + (bb_gt[2] - bb_gt[0]) *
|
||||||
|
(bb_gt[3] - bb_gt[1]) - wh)
|
||||||
|
return (o)
|
||||||
|
|
||||||
|
|
||||||
|
def associate_detections_to_trackers(detections, trackers, iou_threshold=0.25):
|
||||||
|
"""Assigns detections to tracked object (both represented as bounding boxes)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
3 lists of matches, unmatched_detections and unmatched_trackers.
|
||||||
|
"""
|
||||||
|
if len(trackers) == 0:
|
||||||
|
return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
|
||||||
|
|
||||||
|
iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)
|
||||||
|
|
||||||
|
for d, det in enumerate(detections):
|
||||||
|
for t, trk in enumerate(trackers):
|
||||||
|
iou_matrix[d, t] = iou(det, trk)
|
||||||
|
# The linear assignment module tries to minimize the total assignment cost.
|
||||||
|
# In our case we pass -iou_matrix as we want to maximise the total IOU
|
||||||
|
# between track predictions and the frame detection.
|
||||||
|
row_ind, col_ind = linear_assignment(-iou_matrix)
|
||||||
|
|
||||||
|
unmatched_detections = []
|
||||||
|
for d, det in enumerate(detections):
|
||||||
|
if d not in row_ind:
|
||||||
|
unmatched_detections.append(d)
|
||||||
|
unmatched_trackers = []
|
||||||
|
for t, trk in enumerate(trackers):
|
||||||
|
if t not in col_ind:
|
||||||
|
unmatched_trackers.append(t)
|
||||||
|
|
||||||
|
# filter out matched with low IOU
|
||||||
|
matches = []
|
||||||
|
for row, col in zip(row_ind, col_ind):
|
||||||
|
if iou_matrix[row, col] < iou_threshold:
|
||||||
|
unmatched_detections.append(row)
|
||||||
|
unmatched_trackers.append(col)
|
||||||
|
else:
|
||||||
|
matches.append(np.array([[row, col]]))
|
||||||
|
|
||||||
|
if len(matches) == 0:
|
||||||
|
matches = np.empty((0, 2), dtype=int)
|
||||||
|
else:
|
||||||
|
matches = np.concatenate(matches, axis=0)
|
||||||
|
|
||||||
|
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
|
@ -0,0 +1,108 @@
|
|||||||
|
import numpy as np
|
||||||
|
from filterpy.kalman import KalmanFilter
|
||||||
|
|
||||||
|
|
||||||
|
def convert_bbox_to_z(bbox):
|
||||||
|
"""Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
|
||||||
|
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and
|
||||||
|
r is the aspect ratio
|
||||||
|
"""
|
||||||
|
w = bbox[2] - bbox[0]
|
||||||
|
h = bbox[3] - bbox[1]
|
||||||
|
x = bbox[0] + w / 2.
|
||||||
|
y = bbox[1] + h / 2.
|
||||||
|
s = w * h # scale is just area
|
||||||
|
r = w / float(h)
|
||||||
|
return np.array([x, y, s, r]).reshape((4, 1))
|
||||||
|
|
||||||
|
|
||||||
|
def convert_x_to_bbox(x, score=None):
|
||||||
|
"""Takes a bounding box in the centre form [x,y,s,r] and returns it in
|
||||||
|
the form [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom
|
||||||
|
right
|
||||||
|
"""
|
||||||
|
w = np.sqrt(x[2] * x[3])
|
||||||
|
h = x[2] / w
|
||||||
|
if score is None:
|
||||||
|
return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
|
||||||
|
else:
|
||||||
|
return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
|
||||||
|
|
||||||
|
|
||||||
|
class KalmanBoxTracker(object):
|
||||||
|
"""This class represents the internal state of individual tracked objects
|
||||||
|
observed as bbox.
|
||||||
|
doc: https://filterpy.readthedocs.io/en/latest/kalman/KalmanFilter.html
|
||||||
|
"""
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
def __init__(self, bbox):
|
||||||
|
"""Initialize a tracker using initial bounding box.
|
||||||
|
"""
|
||||||
|
# define constant velocity model
|
||||||
|
# TODO: x: what is the meanning of x[4:7], v?
|
||||||
|
self.kf = KalmanFilter(dim_x=7, dim_z=4)
|
||||||
|
# F (dim_x, dim_x): state transition matrix
|
||||||
|
self.kf.F = np.array([[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0,
|
||||||
|
1], [0, 0, 0, 1, 0, 0, 0],
|
||||||
|
[0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])
|
||||||
|
# H (dim_z, dim_x): measurement function
|
||||||
|
self.kf.H = np.array([[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0],
|
||||||
|
[0, 0, 0, 1, 0, 0, 0]])
|
||||||
|
# R (dim_z, dim_z): measurement uncertainty/noise
|
||||||
|
self.kf.R[2:, 2:] *= 10.
|
||||||
|
# P (dim_x, dim_x): covariance matrix
|
||||||
|
# give high uncertainty to the unobservable initial velocities
|
||||||
|
self.kf.P[4:, 4:] *= 1000.
|
||||||
|
self.kf.P *= 10.
|
||||||
|
# Q (dim_x, dim_x): Process uncertainty/noise
|
||||||
|
self.kf.Q[-1, -1] *= 0.01
|
||||||
|
self.kf.Q[4:, 4:] *= 0.01
|
||||||
|
# x (dim_x, 1): filter state estimate
|
||||||
|
self.kf.x[:4] = convert_bbox_to_z(bbox)
|
||||||
|
|
||||||
|
self.time_since_update = 0
|
||||||
|
self.id = KalmanBoxTracker.count
|
||||||
|
KalmanBoxTracker.count += 1
|
||||||
|
self.history = []
|
||||||
|
self.hits = 0
|
||||||
|
self.hit_streak = 0
|
||||||
|
self.age = 0
|
||||||
|
|
||||||
|
# 解决画面中无人脸检测到时而导致的原有追踪器人像预测的漂移bug
|
||||||
|
self.predict_num = 0 # 连续预测的数目
|
||||||
|
|
||||||
|
# additional fields
|
||||||
|
self.face_attributes = []
|
||||||
|
|
||||||
|
def update(self, bbox):
|
||||||
|
"""Updates the state vector with observed bbox.
|
||||||
|
"""
|
||||||
|
self.time_since_update = 0
|
||||||
|
self.history = []
|
||||||
|
self.hits += 1
|
||||||
|
self.hit_streak += 1 # 连续命中
|
||||||
|
if bbox != []:
|
||||||
|
self.kf.update(convert_bbox_to_z(bbox))
|
||||||
|
self.predict_num = 0
|
||||||
|
else:
|
||||||
|
self.predict_num += 1
|
||||||
|
|
||||||
|
def predict(self):
|
||||||
|
"""Advances the state vector and returns the predicted bounding box
|
||||||
|
estimate.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if (self.kf.x[6] + self.kf.x[2]) <= 0:
|
||||||
|
self.kf.x[6] *= 0.0
|
||||||
|
self.kf.predict()
|
||||||
|
self.age += 1
|
||||||
|
if self.time_since_update > 0:
|
||||||
|
self.hit_streak = 0
|
||||||
|
self.time_since_update += 1
|
||||||
|
self.history.append(convert_x_to_bbox(self.kf.x))
|
||||||
|
return self.history[-1][0]
|
||||||
|
|
||||||
|
def get_state(self):
|
||||||
|
"""Returns the current bounding box estimate."""
|
||||||
|
return convert_x_to_bbox(self.kf.x)[0]
|
@ -0,0 +1,92 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.tracking.data_association import associate_detections_to_trackers
|
||||||
|
from imaginairy.vendored.facexlib.tracking.kalman_tracker import KalmanBoxTracker
|
||||||
|
|
||||||
|
|
||||||
|
class SORT(object):
|
||||||
|
"""SORT: A Simple, Online and Realtime Tracker.
|
||||||
|
|
||||||
|
Ref: https://github.com/abewley/sort
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
|
||||||
|
self.max_age = max_age
|
||||||
|
self.min_hits = min_hits # 最小的连续命中, 只有满足的才会被返回
|
||||||
|
self.iou_threshold = iou_threshold
|
||||||
|
self.trackers = []
|
||||||
|
self.frame_count = 0
|
||||||
|
|
||||||
|
def update(self, dets, img_size, additional_attr, detect_interval):
|
||||||
|
"""This method must be called once for each frame even with
|
||||||
|
empty detections.
|
||||||
|
NOTE:as in practical realtime MOT, the detector doesn't run on every
|
||||||
|
single frame.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dets (Numpy array): detections in the format
|
||||||
|
[[x0,y0,x1,y1,score], [x0,y0,x1,y1,score], ...]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
a similar array, where the last column is the object ID.
|
||||||
|
"""
|
||||||
|
self.frame_count += 1
|
||||||
|
|
||||||
|
# get predicted locations from existing trackers
|
||||||
|
trks = np.zeros((len(self.trackers), 5))
|
||||||
|
to_del = [] # To be deleted
|
||||||
|
ret = []
|
||||||
|
# predict tracker position using Kalman filter
|
||||||
|
for t, trk in enumerate(trks):
|
||||||
|
pos = self.trackers[t].predict() # Kalman predict ,very fast ,<1ms
|
||||||
|
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
|
||||||
|
if np.any(np.isnan(pos)):
|
||||||
|
to_del.append(t)
|
||||||
|
trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
|
||||||
|
for t in reversed(to_del):
|
||||||
|
self.trackers.pop(t)
|
||||||
|
|
||||||
|
if dets != []:
|
||||||
|
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers( # noqa: E501
|
||||||
|
dets, trks)
|
||||||
|
|
||||||
|
# update matched trackers with assigned detections
|
||||||
|
for t, trk in enumerate(self.trackers):
|
||||||
|
if t not in unmatched_trks:
|
||||||
|
d = matched[np.where(matched[:, 1] == t)[0], 0]
|
||||||
|
trk.update(dets[d, :][0])
|
||||||
|
trk.face_attributes.append(additional_attr[d[0]])
|
||||||
|
|
||||||
|
# create and initialize new trackers for unmatched detections
|
||||||
|
for i in unmatched_dets:
|
||||||
|
trk = KalmanBoxTracker(dets[i, :])
|
||||||
|
trk.face_attributes.append(additional_attr[i])
|
||||||
|
print(f'New tracker: {trk.id + 1}.')
|
||||||
|
self.trackers.append(trk)
|
||||||
|
|
||||||
|
i = len(self.trackers)
|
||||||
|
for trk in reversed(self.trackers):
|
||||||
|
if dets == []:
|
||||||
|
trk.update([])
|
||||||
|
|
||||||
|
d = trk.get_state()
|
||||||
|
# get return tracklet
|
||||||
|
# 1) time_since_update < 1: detected
|
||||||
|
# 2) i) hit_streak >= min_hits: 最小的连续命中
|
||||||
|
# ii) frame_count <= min_hits: 最开始的几帧
|
||||||
|
if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
|
||||||
|
ret.append(np.concatenate((d, [trk.id + 1])).reshape(1, -1)) # +1 as MOT benchmark requires positive
|
||||||
|
i -= 1
|
||||||
|
|
||||||
|
# remove dead tracklet
|
||||||
|
# 1) time_since_update >= max_age: 多久没有更新了
|
||||||
|
# 2) predict_num: 连续预测的帧数
|
||||||
|
# 3) out of image size
|
||||||
|
if (trk.time_since_update >= self.max_age) or (trk.predict_num >= detect_interval) or (
|
||||||
|
d[2] < 0 or d[3] < 0 or d[0] > img_size[1] or d[1] > img_size[0]):
|
||||||
|
print(f'Remove tracker: {trk.id + 1}')
|
||||||
|
self.trackers.pop(i)
|
||||||
|
if len(ret) > 0:
|
||||||
|
return np.concatenate(ret)
|
||||||
|
else:
|
||||||
|
return np.empty((0, 5))
|
@ -0,0 +1,7 @@
|
|||||||
|
from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_valid_bboxes, paste_face_back
|
||||||
|
from .misc import img2tensor, load_file_from_url, scandir
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url', 'paste_face_back',
|
||||||
|
'img2tensor', 'scandir'
|
||||||
|
]
|
@ -0,0 +1,374 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import torch
|
||||||
|
from torchvision.transforms.functional import normalize
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.detection import init_detection_model
|
||||||
|
from imaginairy.vendored.facexlib.parsing import init_parsing_model
|
||||||
|
from imaginairy.vendored.facexlib.utils.misc import img2tensor, imwrite
|
||||||
|
|
||||||
|
|
||||||
|
def get_largest_face(det_faces, h, w):
|
||||||
|
|
||||||
|
def get_location(val, length):
|
||||||
|
if val < 0:
|
||||||
|
return 0
|
||||||
|
elif val > length:
|
||||||
|
return length
|
||||||
|
else:
|
||||||
|
return val
|
||||||
|
|
||||||
|
face_areas = []
|
||||||
|
for det_face in det_faces:
|
||||||
|
left = get_location(det_face[0], w)
|
||||||
|
right = get_location(det_face[2], w)
|
||||||
|
top = get_location(det_face[1], h)
|
||||||
|
bottom = get_location(det_face[3], h)
|
||||||
|
face_area = (right - left) * (bottom - top)
|
||||||
|
face_areas.append(face_area)
|
||||||
|
largest_idx = face_areas.index(max(face_areas))
|
||||||
|
return det_faces[largest_idx], largest_idx
|
||||||
|
|
||||||
|
|
||||||
|
def get_center_face(det_faces, h=0, w=0, center=None):
|
||||||
|
if center is not None:
|
||||||
|
center = np.array(center)
|
||||||
|
else:
|
||||||
|
center = np.array([w / 2, h / 2])
|
||||||
|
center_dist = []
|
||||||
|
for det_face in det_faces:
|
||||||
|
face_center = np.array([(det_face[0] + det_face[2]) / 2, (det_face[1] + det_face[3]) / 2])
|
||||||
|
dist = np.linalg.norm(face_center - center)
|
||||||
|
center_dist.append(dist)
|
||||||
|
center_idx = center_dist.index(min(center_dist))
|
||||||
|
return det_faces[center_idx], center_idx
|
||||||
|
|
||||||
|
|
||||||
|
class FaceRestoreHelper(object):
|
||||||
|
"""Helper for the face restoration pipeline (base class)."""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
upscale_factor,
|
||||||
|
face_size=512,
|
||||||
|
crop_ratio=(1, 1),
|
||||||
|
det_model='retinaface_resnet50',
|
||||||
|
save_ext='png',
|
||||||
|
template_3points=False,
|
||||||
|
pad_blur=False,
|
||||||
|
use_parse=False,
|
||||||
|
device=None,
|
||||||
|
model_rootpath=None):
|
||||||
|
self.template_3points = template_3points # improve robustness
|
||||||
|
self.upscale_factor = upscale_factor
|
||||||
|
# the cropped face ratio based on the square face
|
||||||
|
self.crop_ratio = crop_ratio # (h, w)
|
||||||
|
assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
|
||||||
|
self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
|
||||||
|
|
||||||
|
if self.template_3points:
|
||||||
|
self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
|
||||||
|
else:
|
||||||
|
# standard 5 landmarks for FFHQ faces with 512 x 512
|
||||||
|
self.face_template = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
|
||||||
|
[201.26117, 371.41043], [313.08905, 371.15118]])
|
||||||
|
self.face_template = self.face_template * (face_size / 512.0)
|
||||||
|
if self.crop_ratio[0] > 1:
|
||||||
|
self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
|
||||||
|
if self.crop_ratio[1] > 1:
|
||||||
|
self.face_template[:, 0] += face_size * (self.crop_ratio[1] - 1) / 2
|
||||||
|
self.save_ext = save_ext
|
||||||
|
self.pad_blur = pad_blur
|
||||||
|
if self.pad_blur is True:
|
||||||
|
self.template_3points = False
|
||||||
|
|
||||||
|
self.all_landmarks_5 = []
|
||||||
|
self.det_faces = []
|
||||||
|
self.affine_matrices = []
|
||||||
|
self.inverse_affine_matrices = []
|
||||||
|
self.cropped_faces = []
|
||||||
|
self.restored_faces = []
|
||||||
|
self.pad_input_imgs = []
|
||||||
|
|
||||||
|
if device is None:
|
||||||
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
else:
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
# init face detection model
|
||||||
|
self.face_det = init_detection_model(det_model, half=False, device=self.device, model_rootpath=model_rootpath)
|
||||||
|
|
||||||
|
# init face parsing model
|
||||||
|
self.use_parse = use_parse
|
||||||
|
self.face_parse = init_parsing_model(model_name='parsenet', device=self.device, model_rootpath=model_rootpath)
|
||||||
|
|
||||||
|
def set_upscale_factor(self, upscale_factor):
|
||||||
|
self.upscale_factor = upscale_factor
|
||||||
|
|
||||||
|
def read_image(self, img):
|
||||||
|
"""img can be image path or cv2 loaded image."""
|
||||||
|
# self.input_img is Numpy array, (h, w, c), BGR, uint8, [0, 255]
|
||||||
|
if isinstance(img, str):
|
||||||
|
img = cv2.imread(img)
|
||||||
|
|
||||||
|
if np.max(img) > 256: # 16-bit image
|
||||||
|
img = img / 65535 * 255
|
||||||
|
if len(img.shape) == 2: # gray image
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||||
|
elif img.shape[2] == 4: # RGBA image with alpha channel
|
||||||
|
img = img[:, :, 0:3]
|
||||||
|
|
||||||
|
self.input_img = img
|
||||||
|
|
||||||
|
def get_face_landmarks_5(self,
|
||||||
|
only_keep_largest=False,
|
||||||
|
only_center_face=False,
|
||||||
|
resize=None,
|
||||||
|
blur_ratio=0.01,
|
||||||
|
eye_dist_threshold=None):
|
||||||
|
if resize is None:
|
||||||
|
scale = 1
|
||||||
|
input_img = self.input_img
|
||||||
|
else:
|
||||||
|
h, w = self.input_img.shape[0:2]
|
||||||
|
scale = min(h, w) / resize
|
||||||
|
h, w = int(h / scale), int(w / scale)
|
||||||
|
input_img = cv2.resize(self.input_img, (w, h), interpolation=cv2.INTER_LANCZOS4)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
bboxes = self.face_det.detect_faces(input_img, 0.97) * scale
|
||||||
|
for bbox in bboxes:
|
||||||
|
# remove faces with too small eye distance: side faces or too small faces
|
||||||
|
eye_dist = np.linalg.norm([bbox[5] - bbox[7], bbox[6] - bbox[8]])
|
||||||
|
if eye_dist_threshold is not None and (eye_dist < eye_dist_threshold):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if self.template_3points:
|
||||||
|
landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 11, 2)])
|
||||||
|
else:
|
||||||
|
landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 15, 2)])
|
||||||
|
self.all_landmarks_5.append(landmark)
|
||||||
|
self.det_faces.append(bbox[0:5])
|
||||||
|
if len(self.det_faces) == 0:
|
||||||
|
return 0
|
||||||
|
if only_keep_largest:
|
||||||
|
h, w, _ = self.input_img.shape
|
||||||
|
self.det_faces, largest_idx = get_largest_face(self.det_faces, h, w)
|
||||||
|
self.all_landmarks_5 = [self.all_landmarks_5[largest_idx]]
|
||||||
|
elif only_center_face:
|
||||||
|
h, w, _ = self.input_img.shape
|
||||||
|
self.det_faces, center_idx = get_center_face(self.det_faces, h, w)
|
||||||
|
self.all_landmarks_5 = [self.all_landmarks_5[center_idx]]
|
||||||
|
|
||||||
|
# pad blurry images
|
||||||
|
if self.pad_blur:
|
||||||
|
self.pad_input_imgs = []
|
||||||
|
for landmarks in self.all_landmarks_5:
|
||||||
|
# get landmarks
|
||||||
|
eye_left = landmarks[0, :]
|
||||||
|
eye_right = landmarks[1, :]
|
||||||
|
eye_avg = (eye_left + eye_right) * 0.5
|
||||||
|
mouth_avg = (landmarks[3, :] + landmarks[4, :]) * 0.5
|
||||||
|
eye_to_eye = eye_right - eye_left
|
||||||
|
eye_to_mouth = mouth_avg - eye_avg
|
||||||
|
|
||||||
|
# Get the oriented crop rectangle
|
||||||
|
# x: half width of the oriented crop rectangle
|
||||||
|
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
|
||||||
|
# - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
|
||||||
|
# norm with the hypotenuse: get the direction
|
||||||
|
x /= np.hypot(*x) # get the hypotenuse of a right triangle
|
||||||
|
rect_scale = 1.5
|
||||||
|
x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
|
||||||
|
# y: half height of the oriented crop rectangle
|
||||||
|
y = np.flipud(x) * [-1, 1]
|
||||||
|
|
||||||
|
# c: center
|
||||||
|
c = eye_avg + eye_to_mouth * 0.1
|
||||||
|
# quad: (left_top, left_bottom, right_bottom, right_top)
|
||||||
|
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
|
||||||
|
# qsize: side length of the square
|
||||||
|
qsize = np.hypot(*x) * 2
|
||||||
|
border = max(int(np.rint(qsize * 0.1)), 3)
|
||||||
|
|
||||||
|
# get pad
|
||||||
|
# pad: (width_left, height_top, width_right, height_bottom)
|
||||||
|
pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
|
||||||
|
int(np.ceil(max(quad[:, 1]))))
|
||||||
|
pad = [
|
||||||
|
max(-pad[0] + border, 1),
|
||||||
|
max(-pad[1] + border, 1),
|
||||||
|
max(pad[2] - self.input_img.shape[0] + border, 1),
|
||||||
|
max(pad[3] - self.input_img.shape[1] + border, 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
if max(pad) > 1:
|
||||||
|
# pad image
|
||||||
|
pad_img = np.pad(self.input_img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
|
||||||
|
# modify landmark coords
|
||||||
|
landmarks[:, 0] += pad[0]
|
||||||
|
landmarks[:, 1] += pad[1]
|
||||||
|
# blur pad images
|
||||||
|
h, w, _ = pad_img.shape
|
||||||
|
y, x, _ = np.ogrid[:h, :w, :1]
|
||||||
|
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
|
||||||
|
np.float32(w - 1 - x) / pad[2]),
|
||||||
|
1.0 - np.minimum(np.float32(y) / pad[1],
|
||||||
|
np.float32(h - 1 - y) / pad[3]))
|
||||||
|
blur = int(qsize * blur_ratio)
|
||||||
|
if blur % 2 == 0:
|
||||||
|
blur += 1
|
||||||
|
blur_img = cv2.boxFilter(pad_img, 0, ksize=(blur, blur))
|
||||||
|
# blur_img = cv2.GaussianBlur(pad_img, (blur, blur), 0)
|
||||||
|
|
||||||
|
pad_img = pad_img.astype('float32')
|
||||||
|
pad_img += (blur_img - pad_img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
|
||||||
|
pad_img += (np.median(pad_img, axis=(0, 1)) - pad_img) * np.clip(mask, 0.0, 1.0)
|
||||||
|
pad_img = np.clip(pad_img, 0, 255) # float32, [0, 255]
|
||||||
|
self.pad_input_imgs.append(pad_img)
|
||||||
|
else:
|
||||||
|
self.pad_input_imgs.append(np.copy(self.input_img))
|
||||||
|
|
||||||
|
return len(self.all_landmarks_5)
|
||||||
|
|
||||||
|
def align_warp_face(self, save_cropped_path=None, border_mode='constant'):
|
||||||
|
"""Align and warp faces with face template.
|
||||||
|
"""
|
||||||
|
if self.pad_blur:
|
||||||
|
assert len(self.pad_input_imgs) == len(
|
||||||
|
self.all_landmarks_5), f'Mismatched samples: {len(self.pad_input_imgs)} and {len(self.all_landmarks_5)}'
|
||||||
|
for idx, landmark in enumerate(self.all_landmarks_5):
|
||||||
|
# use 5 landmarks to get affine matrix
|
||||||
|
# use cv2.LMEDS method for the equivalence to skimage transform
|
||||||
|
# ref: https://blog.csdn.net/yichxi/article/details/115827338
|
||||||
|
affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
|
||||||
|
self.affine_matrices.append(affine_matrix)
|
||||||
|
# warp and crop faces
|
||||||
|
if border_mode == 'constant':
|
||||||
|
border_mode = cv2.BORDER_CONSTANT
|
||||||
|
elif border_mode == 'reflect101':
|
||||||
|
border_mode = cv2.BORDER_REFLECT101
|
||||||
|
elif border_mode == 'reflect':
|
||||||
|
border_mode = cv2.BORDER_REFLECT
|
||||||
|
if self.pad_blur:
|
||||||
|
input_img = self.pad_input_imgs[idx]
|
||||||
|
else:
|
||||||
|
input_img = self.input_img
|
||||||
|
cropped_face = cv2.warpAffine(
|
||||||
|
input_img, affine_matrix, self.face_size, borderMode=border_mode, borderValue=(135, 133, 132)) # gray
|
||||||
|
self.cropped_faces.append(cropped_face)
|
||||||
|
# save the cropped face
|
||||||
|
if save_cropped_path is not None:
|
||||||
|
path = os.path.splitext(save_cropped_path)[0]
|
||||||
|
save_path = f'{path}_{idx:02d}.{self.save_ext}'
|
||||||
|
imwrite(cropped_face, save_path)
|
||||||
|
|
||||||
|
def get_inverse_affine(self, save_inverse_affine_path=None):
|
||||||
|
"""Get inverse affine matrix."""
|
||||||
|
for idx, affine_matrix in enumerate(self.affine_matrices):
|
||||||
|
inverse_affine = cv2.invertAffineTransform(affine_matrix)
|
||||||
|
inverse_affine *= self.upscale_factor
|
||||||
|
self.inverse_affine_matrices.append(inverse_affine)
|
||||||
|
# save inverse affine matrices
|
||||||
|
if save_inverse_affine_path is not None:
|
||||||
|
path, _ = os.path.splitext(save_inverse_affine_path)
|
||||||
|
save_path = f'{path}_{idx:02d}.pth'
|
||||||
|
torch.save(inverse_affine, save_path)
|
||||||
|
|
||||||
|
def add_restored_face(self, face):
|
||||||
|
self.restored_faces.append(face)
|
||||||
|
|
||||||
|
def paste_faces_to_input_image(self, save_path=None, upsample_img=None):
|
||||||
|
h, w, _ = self.input_img.shape
|
||||||
|
h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
|
||||||
|
|
||||||
|
if upsample_img is None:
|
||||||
|
# simply resize the background
|
||||||
|
upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
|
||||||
|
else:
|
||||||
|
upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
|
||||||
|
|
||||||
|
assert len(self.restored_faces) == len(
|
||||||
|
self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
|
||||||
|
for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
|
||||||
|
# Add an offset to inverse affine matrix, for more precise back alignment
|
||||||
|
if self.upscale_factor > 1:
|
||||||
|
extra_offset = 0.5 * self.upscale_factor
|
||||||
|
else:
|
||||||
|
extra_offset = 0
|
||||||
|
inverse_affine[:, 2] += extra_offset
|
||||||
|
inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
|
||||||
|
|
||||||
|
if self.use_parse:
|
||||||
|
# inference
|
||||||
|
face_input = cv2.resize(restored_face, (512, 512), interpolation=cv2.INTER_LINEAR)
|
||||||
|
face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True)
|
||||||
|
normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
|
||||||
|
face_input = torch.unsqueeze(face_input, 0).to(self.device)
|
||||||
|
with torch.no_grad():
|
||||||
|
out = self.face_parse(face_input)[0]
|
||||||
|
out = out.argmax(dim=1).squeeze().cpu().numpy()
|
||||||
|
|
||||||
|
mask = np.zeros(out.shape)
|
||||||
|
MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0]
|
||||||
|
for idx, color in enumerate(MASK_COLORMAP):
|
||||||
|
mask[out == idx] = color
|
||||||
|
# blur the mask
|
||||||
|
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
||||||
|
mask = cv2.GaussianBlur(mask, (101, 101), 11)
|
||||||
|
# remove the black borders
|
||||||
|
thres = 10
|
||||||
|
mask[:thres, :] = 0
|
||||||
|
mask[-thres:, :] = 0
|
||||||
|
mask[:, :thres] = 0
|
||||||
|
mask[:, -thres:] = 0
|
||||||
|
mask = mask / 255.
|
||||||
|
|
||||||
|
mask = cv2.resize(mask, restored_face.shape[:2])
|
||||||
|
mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up), flags=3)
|
||||||
|
inv_soft_mask = mask[:, :, None]
|
||||||
|
pasted_face = inv_restored
|
||||||
|
|
||||||
|
else: # use square parse maps
|
||||||
|
mask = np.ones(self.face_size, dtype=np.float32)
|
||||||
|
inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
||||||
|
# remove the black borders
|
||||||
|
inv_mask_erosion = cv2.erode(
|
||||||
|
inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
|
||||||
|
pasted_face = inv_mask_erosion[:, :, None] * inv_restored
|
||||||
|
total_face_area = np.sum(inv_mask_erosion) # // 3
|
||||||
|
# compute the fusion edge based on the area of face
|
||||||
|
w_edge = int(total_face_area**0.5) // 20
|
||||||
|
erosion_radius = w_edge * 2
|
||||||
|
inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
|
||||||
|
blur_size = w_edge * 2
|
||||||
|
inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
|
||||||
|
if len(upsample_img.shape) == 2: # upsample_img is gray image
|
||||||
|
upsample_img = upsample_img[:, :, None]
|
||||||
|
inv_soft_mask = inv_soft_mask[:, :, None]
|
||||||
|
|
||||||
|
if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4: # alpha channel
|
||||||
|
alpha = upsample_img[:, :, 3:]
|
||||||
|
upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3]
|
||||||
|
upsample_img = np.concatenate((upsample_img, alpha), axis=2)
|
||||||
|
else:
|
||||||
|
upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img
|
||||||
|
|
||||||
|
if np.max(upsample_img) > 256: # 16-bit image
|
||||||
|
upsample_img = upsample_img.astype(np.uint16)
|
||||||
|
else:
|
||||||
|
upsample_img = upsample_img.astype(np.uint8)
|
||||||
|
if save_path is not None:
|
||||||
|
path = os.path.splitext(save_path)[0]
|
||||||
|
save_path = f'{path}.{self.save_ext}'
|
||||||
|
imwrite(upsample_img, save_path)
|
||||||
|
return upsample_img
|
||||||
|
|
||||||
|
def clean_all(self):
|
||||||
|
self.all_landmarks_5 = []
|
||||||
|
self.restored_faces = []
|
||||||
|
self.affine_matrices = []
|
||||||
|
self.cropped_faces = []
|
||||||
|
self.inverse_affine_matrices = []
|
||||||
|
self.det_faces = []
|
||||||
|
self.pad_input_imgs = []
|
@ -0,0 +1,250 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
def compute_increased_bbox(bbox, increase_area, preserve_aspect=True):
|
||||||
|
left, top, right, bot = bbox
|
||||||
|
width = right - left
|
||||||
|
height = bot - top
|
||||||
|
|
||||||
|
if preserve_aspect:
|
||||||
|
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
|
||||||
|
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
|
||||||
|
else:
|
||||||
|
width_increase = height_increase = increase_area
|
||||||
|
left = int(left - width_increase * width)
|
||||||
|
top = int(top - height_increase * height)
|
||||||
|
right = int(right + width_increase * width)
|
||||||
|
bot = int(bot + height_increase * height)
|
||||||
|
return (left, top, right, bot)
|
||||||
|
|
||||||
|
|
||||||
|
def get_valid_bboxes(bboxes, h, w):
|
||||||
|
left = max(bboxes[0], 0)
|
||||||
|
top = max(bboxes[1], 0)
|
||||||
|
right = min(bboxes[2], w)
|
||||||
|
bottom = min(bboxes[3], h)
|
||||||
|
return (left, top, right, bottom)
|
||||||
|
|
||||||
|
|
||||||
|
def align_crop_face_landmarks(img,
|
||||||
|
landmarks,
|
||||||
|
output_size,
|
||||||
|
transform_size=None,
|
||||||
|
enable_padding=True,
|
||||||
|
return_inverse_affine=False,
|
||||||
|
shrink_ratio=(1, 1)):
|
||||||
|
"""Align and crop face with landmarks.
|
||||||
|
|
||||||
|
The output_size and transform_size are based on width. The height is
|
||||||
|
adjusted based on shrink_ratio_h/shring_ration_w.
|
||||||
|
|
||||||
|
Modified from:
|
||||||
|
https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (Numpy array): Input image.
|
||||||
|
landmarks (Numpy array): 5 or 68 or 98 landmarks.
|
||||||
|
output_size (int): Output face size.
|
||||||
|
transform_size (ing): Transform size. Usually the four time of
|
||||||
|
output_size.
|
||||||
|
enable_padding (float): Default: True.
|
||||||
|
shrink_ratio (float | tuple[float] | list[float]): Shring the whole
|
||||||
|
face for height and width (crop larger area). Default: (1, 1).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(Numpy array): Cropped face.
|
||||||
|
"""
|
||||||
|
lm_type = 'retinaface_5' # Options: dlib_5, retinaface_5
|
||||||
|
|
||||||
|
if isinstance(shrink_ratio, (float, int)):
|
||||||
|
shrink_ratio = (shrink_ratio, shrink_ratio)
|
||||||
|
if transform_size is None:
|
||||||
|
transform_size = output_size * 4
|
||||||
|
|
||||||
|
# Parse landmarks
|
||||||
|
lm = np.array(landmarks)
|
||||||
|
if lm.shape[0] == 5 and lm_type == 'retinaface_5':
|
||||||
|
eye_left = lm[0]
|
||||||
|
eye_right = lm[1]
|
||||||
|
mouth_avg = (lm[3] + lm[4]) * 0.5
|
||||||
|
elif lm.shape[0] == 5 and lm_type == 'dlib_5':
|
||||||
|
lm_eye_left = lm[2:4]
|
||||||
|
lm_eye_right = lm[0:2]
|
||||||
|
eye_left = np.mean(lm_eye_left, axis=0)
|
||||||
|
eye_right = np.mean(lm_eye_right, axis=0)
|
||||||
|
mouth_avg = lm[4]
|
||||||
|
elif lm.shape[0] == 68:
|
||||||
|
lm_eye_left = lm[36:42]
|
||||||
|
lm_eye_right = lm[42:48]
|
||||||
|
eye_left = np.mean(lm_eye_left, axis=0)
|
||||||
|
eye_right = np.mean(lm_eye_right, axis=0)
|
||||||
|
mouth_avg = (lm[48] + lm[54]) * 0.5
|
||||||
|
elif lm.shape[0] == 98:
|
||||||
|
lm_eye_left = lm[60:68]
|
||||||
|
lm_eye_right = lm[68:76]
|
||||||
|
eye_left = np.mean(lm_eye_left, axis=0)
|
||||||
|
eye_right = np.mean(lm_eye_right, axis=0)
|
||||||
|
mouth_avg = (lm[76] + lm[82]) * 0.5
|
||||||
|
|
||||||
|
eye_avg = (eye_left + eye_right) * 0.5
|
||||||
|
eye_to_eye = eye_right - eye_left
|
||||||
|
eye_to_mouth = mouth_avg - eye_avg
|
||||||
|
|
||||||
|
# Get the oriented crop rectangle
|
||||||
|
# x: half width of the oriented crop rectangle
|
||||||
|
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
|
||||||
|
# - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
|
||||||
|
# norm with the hypotenuse: get the direction
|
||||||
|
x /= np.hypot(*x) # get the hypotenuse of a right triangle
|
||||||
|
rect_scale = 1 # TODO: you can edit it to get larger rect
|
||||||
|
x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
|
||||||
|
# y: half height of the oriented crop rectangle
|
||||||
|
y = np.flipud(x) * [-1, 1]
|
||||||
|
|
||||||
|
x *= shrink_ratio[1] # width
|
||||||
|
y *= shrink_ratio[0] # height
|
||||||
|
|
||||||
|
# c: center
|
||||||
|
c = eye_avg + eye_to_mouth * 0.1
|
||||||
|
# quad: (left_top, left_bottom, right_bottom, right_top)
|
||||||
|
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
|
||||||
|
# qsize: side length of the square
|
||||||
|
qsize = np.hypot(*x) * 2
|
||||||
|
|
||||||
|
quad_ori = np.copy(quad)
|
||||||
|
# Shrink, for large face
|
||||||
|
# TODO: do we really need shrink
|
||||||
|
shrink = int(np.floor(qsize / output_size * 0.5))
|
||||||
|
if shrink > 1:
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
rsize = (int(np.rint(float(w) / shrink)), int(np.rint(float(h) / shrink)))
|
||||||
|
img = cv2.resize(img, rsize, interpolation=cv2.INTER_AREA)
|
||||||
|
quad /= shrink
|
||||||
|
qsize /= shrink
|
||||||
|
|
||||||
|
# Crop
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
border = max(int(np.rint(qsize * 0.1)), 3)
|
||||||
|
crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
|
||||||
|
int(np.ceil(max(quad[:, 1]))))
|
||||||
|
crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, w), min(crop[3] + border, h))
|
||||||
|
if crop[2] - crop[0] < w or crop[3] - crop[1] < h:
|
||||||
|
img = img[crop[1]:crop[3], crop[0]:crop[2], :]
|
||||||
|
quad -= crop[0:2]
|
||||||
|
|
||||||
|
# Pad
|
||||||
|
# pad: (width_left, height_top, width_right, height_bottom)
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
|
||||||
|
int(np.ceil(max(quad[:, 1]))))
|
||||||
|
pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - w + border, 0), max(pad[3] - h + border, 0))
|
||||||
|
if enable_padding and max(pad) > border - 4:
|
||||||
|
pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
|
||||||
|
img = np.pad(img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
y, x, _ = np.ogrid[:h, :w, :1]
|
||||||
|
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
|
||||||
|
np.float32(w - 1 - x) / pad[2]),
|
||||||
|
1.0 - np.minimum(np.float32(y) / pad[1],
|
||||||
|
np.float32(h - 1 - y) / pad[3]))
|
||||||
|
blur = int(qsize * 0.02)
|
||||||
|
if blur % 2 == 0:
|
||||||
|
blur += 1
|
||||||
|
blur_img = cv2.boxFilter(img, 0, ksize=(blur, blur))
|
||||||
|
|
||||||
|
img = img.astype('float32')
|
||||||
|
img += (blur_img - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
|
||||||
|
img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
|
||||||
|
img = np.clip(img, 0, 255) # float32, [0, 255]
|
||||||
|
quad += pad[:2]
|
||||||
|
|
||||||
|
# Transform use cv2
|
||||||
|
h_ratio = shrink_ratio[0] / shrink_ratio[1]
|
||||||
|
dst_h, dst_w = int(transform_size * h_ratio), transform_size
|
||||||
|
template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
|
||||||
|
# use cv2.LMEDS method for the equivalence to skimage transform
|
||||||
|
# ref: https://blog.csdn.net/yichxi/article/details/115827338
|
||||||
|
affine_matrix = cv2.estimateAffinePartial2D(quad, template, method=cv2.LMEDS)[0]
|
||||||
|
cropped_face = cv2.warpAffine(
|
||||||
|
img, affine_matrix, (dst_w, dst_h), borderMode=cv2.BORDER_CONSTANT, borderValue=(135, 133, 132)) # gray
|
||||||
|
|
||||||
|
if output_size < transform_size:
|
||||||
|
cropped_face = cv2.resize(
|
||||||
|
cropped_face, (output_size, int(output_size * h_ratio)), interpolation=cv2.INTER_LINEAR)
|
||||||
|
|
||||||
|
if return_inverse_affine:
|
||||||
|
dst_h, dst_w = int(output_size * h_ratio), output_size
|
||||||
|
template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
|
||||||
|
# use cv2.LMEDS method for the equivalence to skimage transform
|
||||||
|
# ref: https://blog.csdn.net/yichxi/article/details/115827338
|
||||||
|
affine_matrix = cv2.estimateAffinePartial2D(
|
||||||
|
quad_ori, np.array([[0, 0], [0, output_size], [dst_w, dst_h], [dst_w, 0]]), method=cv2.LMEDS)[0]
|
||||||
|
inverse_affine = cv2.invertAffineTransform(affine_matrix)
|
||||||
|
else:
|
||||||
|
inverse_affine = None
|
||||||
|
return cropped_face, inverse_affine
|
||||||
|
|
||||||
|
|
||||||
|
def paste_face_back(img, face, inverse_affine):
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
face_h, face_w = face.shape[0:2]
|
||||||
|
inv_restored = cv2.warpAffine(face, inverse_affine, (w, h))
|
||||||
|
mask = np.ones((face_h, face_w, 3), dtype=np.float32)
|
||||||
|
inv_mask = cv2.warpAffine(mask, inverse_affine, (w, h))
|
||||||
|
# remove the black borders
|
||||||
|
inv_mask_erosion = cv2.erode(inv_mask, np.ones((2, 2), np.uint8))
|
||||||
|
inv_restored_remove_border = inv_mask_erosion * inv_restored
|
||||||
|
total_face_area = np.sum(inv_mask_erosion) // 3
|
||||||
|
# compute the fusion edge based on the area of face
|
||||||
|
w_edge = int(total_face_area**0.5) // 20
|
||||||
|
erosion_radius = w_edge * 2
|
||||||
|
inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
|
||||||
|
blur_size = w_edge * 2
|
||||||
|
inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
|
||||||
|
img = inv_soft_mask * inv_restored_remove_border + (1 - inv_soft_mask) * img
|
||||||
|
# float32, [0, 255]
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import os
|
||||||
|
|
||||||
|
from imaginairy.vendored.facexlib.detection import init_detection_model
|
||||||
|
from imaginairy.vendored.facexlib.utils.face_restoration_helper import get_largest_face
|
||||||
|
from imaginairy.vendored.facexlib.visualization import visualize_detection
|
||||||
|
|
||||||
|
img_path = '/home/wxt/datasets/ffhq/ffhq_wild/00009.png'
|
||||||
|
img_name = os.splitext(os.path.basename(img_path))[0]
|
||||||
|
|
||||||
|
# initialize model
|
||||||
|
det_net = init_detection_model('retinaface_resnet50', half=False)
|
||||||
|
img_ori = cv2.imread(img_path)
|
||||||
|
h, w = img_ori.shape[0:2]
|
||||||
|
# if larger than 800, scale it
|
||||||
|
scale = max(h / 800, w / 800)
|
||||||
|
if scale > 1:
|
||||||
|
img = cv2.resize(img_ori, (int(w / scale), int(h / scale)), interpolation=cv2.INTER_LINEAR)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
bboxes = det_net.detect_faces(img, 0.97)
|
||||||
|
if scale > 1:
|
||||||
|
bboxes *= scale # the score is incorrect
|
||||||
|
bboxes = get_largest_face(bboxes, h, w)[0]
|
||||||
|
visualize_detection(img_ori, [bboxes], f'tmp/{img_name}_det.png')
|
||||||
|
|
||||||
|
landmarks = np.array([[bboxes[i], bboxes[i + 1]] for i in range(5, 15, 2)])
|
||||||
|
|
||||||
|
cropped_face, inverse_affine = align_crop_face_landmarks(
|
||||||
|
img_ori,
|
||||||
|
landmarks,
|
||||||
|
output_size=512,
|
||||||
|
transform_size=None,
|
||||||
|
enable_padding=True,
|
||||||
|
return_inverse_affine=True,
|
||||||
|
shrink_ratio=(1, 1))
|
||||||
|
|
||||||
|
cv2.imwrite(f'tmp/{img_name}_cropeed_face.png', cropped_face)
|
||||||
|
img = paste_face_back(img_ori, cropped_face, inverse_affine)
|
||||||
|
cv2.imwrite(f'tmp/{img_name}_back.png', img)
|
@ -0,0 +1,118 @@
|
|||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
import os.path as osp
|
||||||
|
import torch
|
||||||
|
from torch.hub import download_url_to_file, get_dir
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
def imwrite(img, file_path, params=None, auto_mkdir=True):
|
||||||
|
"""Write image to file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (ndarray): Image array to be written.
|
||||||
|
file_path (str): Image file path.
|
||||||
|
params (None or list): Same as opencv's :func:`imwrite` interface.
|
||||||
|
auto_mkdir (bool): If the parent folder of `file_path` does not exist,
|
||||||
|
whether to create it automatically.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: Successful or not.
|
||||||
|
"""
|
||||||
|
if auto_mkdir:
|
||||||
|
dir_name = os.path.abspath(os.path.dirname(file_path))
|
||||||
|
os.makedirs(dir_name, exist_ok=True)
|
||||||
|
return cv2.imwrite(file_path, img, params)
|
||||||
|
|
||||||
|
|
||||||
|
def img2tensor(imgs, bgr2rgb=True, float32=True):
|
||||||
|
"""Numpy array to tensor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
imgs (list[ndarray] | ndarray): Input images.
|
||||||
|
bgr2rgb (bool): Whether to change bgr to rgb.
|
||||||
|
float32 (bool): Whether to change to float32.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[tensor] | tensor: Tensor images. If returned results only have
|
||||||
|
one element, just return tensor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _totensor(img, bgr2rgb, float32):
|
||||||
|
if img.shape[2] == 3 and bgr2rgb:
|
||||||
|
if img.dtype == 'float64':
|
||||||
|
img = img.astype('float32')
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||||
|
img = torch.from_numpy(img.transpose(2, 0, 1))
|
||||||
|
if float32:
|
||||||
|
img = img.float()
|
||||||
|
return img
|
||||||
|
|
||||||
|
if isinstance(imgs, list):
|
||||||
|
return [_totensor(img, bgr2rgb, float32) for img in imgs]
|
||||||
|
else:
|
||||||
|
return _totensor(imgs, bgr2rgb, float32)
|
||||||
|
|
||||||
|
|
||||||
|
def load_file_from_url(url, model_dir=None, progress=True, file_name=None, save_dir=None):
|
||||||
|
"""Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
|
||||||
|
"""
|
||||||
|
if model_dir is None:
|
||||||
|
hub_dir = get_dir()
|
||||||
|
model_dir = os.path.join(hub_dir, 'checkpoints')
|
||||||
|
|
||||||
|
if save_dir is None:
|
||||||
|
save_dir = os.path.join(ROOT_DIR, model_dir)
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
|
||||||
|
parts = urlparse(url)
|
||||||
|
filename = os.path.basename(parts.path)
|
||||||
|
if file_name is not None:
|
||||||
|
filename = file_name
|
||||||
|
cached_file = os.path.abspath(os.path.join(save_dir, filename))
|
||||||
|
if not os.path.exists(cached_file):
|
||||||
|
print(f'Downloading: "{url}" to {cached_file}\n')
|
||||||
|
download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
|
||||||
|
return cached_file
|
||||||
|
|
||||||
|
|
||||||
|
def scandir(dir_path, suffix=None, recursive=False, full_path=False):
|
||||||
|
"""Scan a directory to find the interested files.
|
||||||
|
Args:
|
||||||
|
dir_path (str): Path of the directory.
|
||||||
|
suffix (str | tuple(str), optional): File suffix that we are
|
||||||
|
interested in. Default: None.
|
||||||
|
recursive (bool, optional): If set to True, recursively scan the
|
||||||
|
directory. Default: False.
|
||||||
|
full_path (bool, optional): If set to True, include the dir_path.
|
||||||
|
Default: False.
|
||||||
|
Returns:
|
||||||
|
A generator for all the interested files with relative paths.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if (suffix is not None) and not isinstance(suffix, (str, tuple)):
|
||||||
|
raise TypeError('"suffix" must be a string or tuple of strings')
|
||||||
|
|
||||||
|
root = dir_path
|
||||||
|
|
||||||
|
def _scandir(dir_path, suffix, recursive):
|
||||||
|
for entry in os.scandir(dir_path):
|
||||||
|
if not entry.name.startswith('.') and entry.is_file():
|
||||||
|
if full_path:
|
||||||
|
return_path = entry.path
|
||||||
|
else:
|
||||||
|
return_path = osp.relpath(entry.path, root)
|
||||||
|
|
||||||
|
if suffix is None:
|
||||||
|
yield return_path
|
||||||
|
elif return_path.endswith(suffix):
|
||||||
|
yield return_path
|
||||||
|
else:
|
||||||
|
if recursive:
|
||||||
|
yield from _scandir(entry.path, suffix=suffix, recursive=recursive)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return _scandir(dir_path, suffix=suffix, recursive=recursive)
|
@ -0,0 +1,5 @@
|
|||||||
|
from .vis_alignment import visualize_alignment
|
||||||
|
from .vis_detection import visualize_detection
|
||||||
|
from .vis_headpose import visualize_headpose
|
||||||
|
|
||||||
|
__all__ = ['visualize_detection', 'visualize_alignment', 'visualize_headpose']
|
@ -0,0 +1,18 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_alignment(img, landmarks, save_path=None, to_bgr=False):
|
||||||
|
img = np.copy(img)
|
||||||
|
h, w = img.shape[0:2]
|
||||||
|
circle_size = int(max(h, w) / 150)
|
||||||
|
if to_bgr:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
|
for landmarks_face in landmarks:
|
||||||
|
for lm in landmarks_face:
|
||||||
|
cv2.circle(img, (int(lm[0]), int(lm[1])), 1, (0, 150, 0), circle_size)
|
||||||
|
|
||||||
|
# save img
|
||||||
|
if save_path is not None:
|
||||||
|
cv2.imwrite(save_path, img)
|
@ -0,0 +1,29 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_detection(img, bboxes_and_landmarks, save_path=None, to_bgr=False):
|
||||||
|
"""Visualize detection results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (Numpy array): Input image. CHW, BGR, [0, 255], uint8.
|
||||||
|
"""
|
||||||
|
img = np.copy(img)
|
||||||
|
if to_bgr:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
|
for b in bboxes_and_landmarks:
|
||||||
|
# confidence
|
||||||
|
cv2.putText(img, f'{b[4]:.4f}', (int(b[0]), int(b[1] + 12)), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
|
||||||
|
# bounding boxes
|
||||||
|
b = list(map(int, b))
|
||||||
|
cv2.rectangle(img, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
|
||||||
|
# landmarks (for retinaface)
|
||||||
|
cv2.circle(img, (b[5], b[6]), 1, (0, 0, 255), 4)
|
||||||
|
cv2.circle(img, (b[7], b[8]), 1, (0, 255, 255), 4)
|
||||||
|
cv2.circle(img, (b[9], b[10]), 1, (255, 0, 255), 4)
|
||||||
|
cv2.circle(img, (b[11], b[12]), 1, (0, 255, 0), 4)
|
||||||
|
cv2.circle(img, (b[13], b[14]), 1, (255, 0, 0), 4)
|
||||||
|
# save img
|
||||||
|
if save_path is not None:
|
||||||
|
cv2.imwrite(save_path, img)
|
@ -0,0 +1,91 @@
|
|||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
from math import cos, sin
|
||||||
|
|
||||||
|
|
||||||
|
def draw_axis(img, yaw, pitch, roll, tdx=None, tdy=None, size=100):
|
||||||
|
"""draw head pose axis."""
|
||||||
|
|
||||||
|
pitch = pitch * np.pi / 180
|
||||||
|
yaw = -yaw * np.pi / 180
|
||||||
|
roll = roll * np.pi / 180
|
||||||
|
|
||||||
|
if tdx is None or tdy is None:
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
tdx = width / 2
|
||||||
|
tdy = height / 2
|
||||||
|
|
||||||
|
# X axis pointing to right, drawn in red
|
||||||
|
x1 = size * (cos(yaw) * cos(roll)) + tdx
|
||||||
|
y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
|
||||||
|
# Y axis pointing downside, drawn in green
|
||||||
|
x2 = size * (-cos(yaw) * sin(roll)) + tdx
|
||||||
|
y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
|
||||||
|
# Z axis, out of the screen, drawn in blue
|
||||||
|
x3 = size * (sin(yaw)) + tdx
|
||||||
|
y3 = size * (-cos(yaw) * sin(pitch)) + tdy
|
||||||
|
|
||||||
|
cv2.line(img, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 3)
|
||||||
|
cv2.line(img, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 3)
|
||||||
|
cv2.line(img, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
|
||||||
|
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def draw_pose_cube(img, yaw, pitch, roll, tdx=None, tdy=None, size=150.):
|
||||||
|
"""draw head pose cube.
|
||||||
|
Where (tdx, tdy) is the translation of the face.
|
||||||
|
For pose we have [pitch yaw roll tdx tdy tdz scale_factor]
|
||||||
|
"""
|
||||||
|
|
||||||
|
p = pitch * np.pi / 180
|
||||||
|
y = -yaw * np.pi / 180
|
||||||
|
r = roll * np.pi / 180
|
||||||
|
if tdx is not None and tdy is not None:
|
||||||
|
face_x = tdx - 0.50 * size
|
||||||
|
face_y = tdy - 0.50 * size
|
||||||
|
else:
|
||||||
|
height, width = img.shape[:2]
|
||||||
|
face_x = width / 2 - 0.5 * size
|
||||||
|
face_y = height / 2 - 0.5 * size
|
||||||
|
|
||||||
|
x1 = size * (cos(y) * cos(r)) + face_x
|
||||||
|
y1 = size * (cos(p) * sin(r) + cos(r) * sin(p) * sin(y)) + face_y
|
||||||
|
x2 = size * (-cos(y) * sin(r)) + face_x
|
||||||
|
y2 = size * (cos(p) * cos(r) - sin(p) * sin(y) * sin(r)) + face_y
|
||||||
|
x3 = size * (sin(y)) + face_x
|
||||||
|
y3 = size * (-cos(y) * sin(p)) + face_y
|
||||||
|
|
||||||
|
# Draw base in red
|
||||||
|
cv2.line(img, (int(face_x), int(face_y)), (int(x1), int(y1)), (0, 0, 255), 3)
|
||||||
|
cv2.line(img, (int(face_x), int(face_y)), (int(x2), int(y2)), (0, 0, 255), 3)
|
||||||
|
cv2.line(img, (int(x2), int(y2)), (int(x2 + x1 - face_x), int(y2 + y1 - face_y)), (0, 0, 255), 3)
|
||||||
|
cv2.line(img, (int(x1), int(y1)), (int(x1 + x2 - face_x), int(y1 + y2 - face_y)), (0, 0, 255), 3)
|
||||||
|
# Draw pillars in blue
|
||||||
|
cv2.line(img, (int(face_x), int(face_y)), (int(x3), int(y3)), (255, 0, 0), 2)
|
||||||
|
cv2.line(img, (int(x1), int(y1)), (int(x1 + x3 - face_x), int(y1 + y3 - face_y)), (255, 0, 0), 2)
|
||||||
|
cv2.line(img, (int(x2), int(y2)), (int(x2 + x3 - face_x), int(y2 + y3 - face_y)), (255, 0, 0), 2)
|
||||||
|
cv2.line(img, (int(x2 + x1 - face_x), int(y2 + y1 - face_y)),
|
||||||
|
(int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (255, 0, 0), 2)
|
||||||
|
# Draw top in green
|
||||||
|
cv2.line(img, (int(x3 + x1 - face_x), int(y3 + y1 - face_y)),
|
||||||
|
(int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (0, 255, 0), 2)
|
||||||
|
cv2.line(img, (int(x2 + x3 - face_x), int(y2 + y3 - face_y)),
|
||||||
|
(int(x3 + x1 + x2 - 2 * face_x), int(y3 + y2 + y1 - 2 * face_y)), (0, 255, 0), 2)
|
||||||
|
cv2.line(img, (int(x3), int(y3)), (int(x3 + x1 - face_x), int(y3 + y1 - face_y)), (0, 255, 0), 2)
|
||||||
|
cv2.line(img, (int(x3), int(y3)), (int(x3 + x2 - face_x), int(y3 + y2 - face_y)), (0, 255, 0), 2)
|
||||||
|
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_headpose(img, yaw, pitch, roll, save_path=None, to_bgr=False):
|
||||||
|
img = np.copy(img)
|
||||||
|
if to_bgr:
|
||||||
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||||
|
show_string = (f'y {yaw[0].item():.2f}, p {pitch[0].item():.2f}, ' + f'r {roll[0].item():.2f}')
|
||||||
|
cv2.putText(img, show_string, (30, img.shape[0] - 30), fontFace=1, fontScale=1, color=(0, 0, 255), thickness=2)
|
||||||
|
draw_pose_cube(img, yaw[0], pitch[0], roll[0], size=100)
|
||||||
|
draw_axis(img, yaw[0], pitch[0], roll[0], tdx=50, tdy=50, size=100)
|
||||||
|
# save img
|
||||||
|
if save_path is not None:
|
||||||
|
cv2.imwrite(save_path, img)
|
Loading…
Reference in New Issue