simplifyUpdate

pull/30/head
Kritik Soman 4 years ago
parent 089d0245eb
commit b68f8c1196

Binary file not shown.

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2019 zll
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,95 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from logger import setup_logger
from model import BiSeNet
from face_dataset import FaceMask
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.distributed as dist
import os
import os.path as osp
import logging
import time
import numpy as np
from tqdm import tqdm
import math
from PIL import Image
import torchvision.transforms as transforms
import cv2
def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
# Colors for all 20 parts
part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
[255, 0, 85], [255, 0, 170],
[0, 255, 0], [85, 255, 0], [170, 255, 0],
[0, 255, 85], [0, 255, 170],
[0, 0, 255], [85, 0, 255], [170, 0, 255],
[0, 85, 255], [0, 170, 255],
[255, 255, 0], [255, 255, 85], [255, 255, 170],
[255, 0, 255], [255, 85, 255], [255, 170, 255],
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
im = np.array(im)
vis_im = im.copy().astype(np.uint8)
vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
num_of_class = np.max(vis_parsing_anno)
for pi in range(1, num_of_class + 1):
index = np.where(vis_parsing_anno == pi)
vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
# print(vis_parsing_anno_color.shape, vis_im.shape)
vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
# Save result or not
if save_im:
cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# return vis_im
def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
if not os.path.exists(respth):
os.makedirs(respth)
n_classes = 19
net = BiSeNet(n_classes=n_classes)
net.cuda()
save_pth = osp.join('res/cp', cp)
net.load_state_dict(torch.load(save_pth))
net.eval()
to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
with torch.no_grad():
for image_path in os.listdir(dspth):
img = Image.open(osp.join(dspth, image_path))
image = img.resize((512, 512), Image.BILINEAR)
img = to_tensor(image)
img = torch.unsqueeze(img, 0)
img = img.cuda()
out = net(img)[0]
parsing = out.squeeze(0).cpu().numpy().argmax(0)
vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
if __name__ == "__main__":
setup_logger('./res')
evaluate()

@ -1,106 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import os.path as osp
import os
from PIL import Image
import numpy as np
import json
import cv2
from transform import *
class FaceMask(Dataset):
def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
super(FaceMask, self).__init__(*args, **kwargs)
assert mode in ('train', 'val', 'test')
self.mode = mode
self.ignore_lb = 255
self.rootpth = rootpth
self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img'))
# pre-processing
self.to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
self.trans_train = Compose([
ColorJitter(
brightness=0.5,
contrast=0.5,
saturation=0.5),
HorizontalFlip(),
RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
RandomCrop(cropsize)
])
def __getitem__(self, idx):
impth = self.imgs[idx]
img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth))
img = img.resize((512, 512), Image.BILINEAR)
label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P')
# print(np.unique(np.array(label)))
if self.mode == 'train':
im_lb = dict(im=img, lb=label)
im_lb = self.trans_train(im_lb)
img, label = im_lb['im'], im_lb['lb']
img = self.to_tensor(img)
label = np.array(label).astype(np.int64)[np.newaxis, :]
return img, label
def __len__(self):
return len(self.imgs)
if __name__ == "__main__":
face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
mask_path = '/home/zll/data/CelebAMask-HQ/mask'
counter = 0
total = 0
for i in range(15):
# files = os.listdir(osp.join(face_sep_mask, str(i)))
atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
for j in range(i*2000, (i+1)*2000):
mask = np.zeros((512, 512))
for l, att in enumerate(atts, 1):
total += 1
file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
path = osp.join(face_sep_mask, str(i), file_name)
if os.path.exists(path):
counter += 1
sep_mask = np.array(Image.open(path).convert('P'))
# print(np.unique(sep_mask))
mask[sep_mask == 225] = l
cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
print(j)
print(counter, total)

@ -1,23 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import os.path as osp
import time
import sys
import logging
import torch.distributed as dist
def setup_logger(logpth):
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
logfile = osp.join(logpth, logfile)
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
log_level = logging.INFO
if dist.is_initialized() and not dist.get_rank()==0:
log_level = logging.ERROR
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
logging.root.addHandler(logging.StreamHandler())

@ -1,75 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class OhemCELoss(nn.Module):
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
super(OhemCELoss, self).__init__()
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
self.n_min = n_min
self.ignore_lb = ignore_lb
self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
def forward(self, logits, labels):
N, C, H, W = logits.size()
loss = self.criteria(logits, labels).view(-1)
loss, _ = torch.sort(loss, descending=True)
if loss[self.n_min] > self.thresh:
loss = loss[loss>self.thresh]
else:
loss = loss[:self.n_min]
return torch.mean(loss)
class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
super(SoftmaxFocalLoss, self).__init__()
self.gamma = gamma
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
def forward(self, logits, labels):
scores = F.softmax(logits, dim=1)
factor = torch.pow(1.-scores, self.gamma)
log_score = F.log_softmax(logits, dim=1)
log_score = factor * log_score
loss = self.nll(log_score, labels)
return loss
if __name__ == '__main__':
torch.manual_seed(15)
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
net1 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net1.cuda()
net1.train()
net2 = nn.Sequential(
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
)
net2.cuda()
net2.train()
with torch.no_grad():
inten = torch.randn(16, 3, 20, 20).cuda()
lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
lbs[1, :, :] = 255
logits1 = net1(inten)
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
logits2 = net2(inten)
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
loss1 = criteria1(logits1, lbs)
loss2 = criteria2(logits2, lbs)
loss = loss1 + loss2
print(loss.detach().cpu())
loss.backward()

@ -1,130 +0,0 @@
import cv2
import os
import numpy as np
from skimage.filters import gaussian
def sharpen(img):
img = img * 1.0
gauss_out = gaussian(img, sigma=5, multichannel=True)
alpha = 1.5
img_out = (img - gauss_out) * alpha + img
img_out = img_out / 255.0
mask_1 = img_out < 0
mask_2 = img_out > 1
img_out = img_out * (1 - mask_1)
img_out = img_out * (1 - mask_2) + mask_2
img_out = np.clip(img_out, 0, 1)
img_out = img_out * 255
return np.array(img_out, dtype=np.uint8)
def hair(image, parsing, part=17, color=[230, 50, 20]):
b, g, r = color #[10, 50, 250] # [10, 250, 10]
tar_color = np.zeros_like(image)
tar_color[:, :, 0] = b
tar_color[:, :, 1] = g
tar_color[:, :, 2] = r
image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV)
if part == 12 or part == 13:
image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2]
else:
image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1]
changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR)
if part == 17:
changed = sharpen(changed)
changed[parsing != part] = image[parsing != part]
# changed = cv2.resize(changed, (512, 512))
return changed
#
# def lip(image, parsing, part=17, color=[230, 50, 20]):
# b, g, r = color #[10, 50, 250] # [10, 250, 10]
# tar_color = np.zeros_like(image)
# tar_color[:, :, 0] = b
# tar_color[:, :, 1] = g
# tar_color[:, :, 2] = r
#
# image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
# il, ia, ib = cv2.split(image_lab)
#
# tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab)
# tl, ta, tb = cv2.split(tar_lab)
#
# image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100)
# image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128)
# image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128)
#
#
# changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR)
#
# if part == 17:
# changed = sharpen(changed)
#
# changed[parsing != part] = image[parsing != part]
# # changed = cv2.resize(changed, (512, 512))
# return changed
if __name__ == '__main__':
# 1 face
# 10 nose
# 11 teeth
# 12 upper lip
# 13 lower lip
# 17 hair
num = 116
table = {
'hair': 17,
'upper_lip': 12,
'lower_lip': 13
}
image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num)
parsing_path = 'res/test_res/{}.png'.format(num)
image = cv2.imread(image_path)
ori = image.copy()
parsing = np.array(cv2.imread(parsing_path, 0))
parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST)
parts = [table['hair'], table['upper_lip'], table['lower_lip']]
# colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]]
colors = [[100, 200, 100]]
for part, color in zip(parts, colors):
image = hair(image, parsing, part, color)
cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512)))
cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512)))
cv2.imshow('image', cv2.resize(ori, (512, 512)))
cv2.imshow('color', cv2.resize(image, (512, 512)))
# cv2.imshow('image', ori)
# cv2.imshow('color', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

@ -1,283 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from resnet import Resnet18
# from modules.bn import InPlaceABNSync as BatchNorm2d
class ConvBNReLU(nn.Module):
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_chan,
out_chan,
kernel_size = ks,
stride = stride,
padding = padding,
bias = False)
self.bn = nn.BatchNorm2d(out_chan)
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = F.relu(self.bn(x))
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class BiSeNetOutput(nn.Module):
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
super(BiSeNetOutput, self).__init__()
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
self.init_weight()
def forward(self, x):
x = self.conv(x)
x = self.conv_out(x)
return x
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class AttentionRefinementModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(AttentionRefinementModule, self).__init__()
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
self.bn_atten = nn.BatchNorm2d(out_chan)
self.sigmoid_atten = nn.Sigmoid()
self.init_weight()
def forward(self, x):
feat = self.conv(x)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv_atten(atten)
atten = self.bn_atten(atten)
atten = self.sigmoid_atten(atten)
out = torch.mul(feat, atten)
return out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
class ContextPath(nn.Module):
def __init__(self, *args, **kwargs):
super(ContextPath, self).__init__()
self.resnet = Resnet18()
self.arm16 = AttentionRefinementModule(256, 128)
self.arm32 = AttentionRefinementModule(512, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
self.init_weight()
def forward(self, x):
H0, W0 = x.size()[2:]
feat8, feat16, feat32 = self.resnet(x)
H8, W8 = feat8.size()[2:]
H16, W16 = feat16.size()[2:]
H32, W32 = feat32.size()[2:]
avg = F.avg_pool2d(feat32, feat32.size()[2:])
avg = self.conv_avg(avg)
avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
feat32_arm = self.arm32(feat32)
feat32_sum = feat32_arm + avg_up
feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
feat32_up = self.conv_head32(feat32_up)
feat16_arm = self.arm16(feat16)
feat16_sum = feat16_arm + feat32_up
feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
feat16_up = self.conv_head16(feat16_up)
return feat8, feat16_up, feat32_up # x8, x8, x16
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
### This is not used, since I replace this with the resnet feature with the same size
class SpatialPath(nn.Module):
def __init__(self, *args, **kwargs):
super(SpatialPath, self).__init__()
self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
self.init_weight()
def forward(self, x):
feat = self.conv1(x)
feat = self.conv2(feat)
feat = self.conv3(feat)
feat = self.conv_out(feat)
return feat
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class FeatureFusionModule(nn.Module):
def __init__(self, in_chan, out_chan, *args, **kwargs):
super(FeatureFusionModule, self).__init__()
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
self.conv1 = nn.Conv2d(out_chan,
out_chan//4,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.conv2 = nn.Conv2d(out_chan//4,
out_chan,
kernel_size = 1,
stride = 1,
padding = 0,
bias = False)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
self.init_weight()
def forward(self, fsp, fcp):
fcat = torch.cat([fsp, fcp], dim=1)
feat = self.convblk(fcat)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv1(atten)
atten = self.relu(atten)
atten = self.conv2(atten)
atten = self.sigmoid(atten)
feat_atten = torch.mul(feat, atten)
feat_out = feat_atten + feat
return feat_out
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
class BiSeNet(nn.Module):
def __init__(self, n_classes, *args, **kwargs):
super(BiSeNet, self).__init__()
self.cp = ContextPath()
## here self.sp is deleted
self.ffm = FeatureFusionModule(256, 256)
self.conv_out = BiSeNetOutput(256, 256, n_classes)
self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
self.init_weight()
def forward(self, x):
H, W = x.size()[2:]
feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature
feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature
feat_fuse = self.ffm(feat_sp, feat_cp8)
feat_out = self.conv_out(feat_fuse)
feat_out16 = self.conv_out16(feat_cp8)
feat_out32 = self.conv_out32(feat_cp16)
feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
return feat_out, feat_out16, feat_out32
def init_weight(self):
for ly in self.children():
if isinstance(ly, nn.Conv2d):
nn.init.kaiming_normal_(ly.weight, a=1)
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
def get_params(self):
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
for name, child in self.named_children():
child_wd_params, child_nowd_params = child.get_params()
if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
lr_mul_wd_params += child_wd_params
lr_mul_nowd_params += child_nowd_params
else:
wd_params += child_wd_params
nowd_params += child_nowd_params
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
if __name__ == "__main__":
net = BiSeNet(19)
net.cuda()
net.eval()
in_ten = torch.randn(16, 3, 640, 480).cuda()
out, out16, out32 = net(in_ten)
print(out.shape)
net.get_params()

@ -1,5 +0,0 @@
from .bn import ABN, InPlaceABN, InPlaceABNSync
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
from .misc import GlobalAvgPool2d, SingleGPU
from .residual import IdentityResidualBlock
from .dense import DenseModule

@ -1,130 +0,0 @@
import torch
import torch.nn as nn
import torch.nn.functional as functional
try:
from queue import Queue
except ImportError:
from Queue import Queue
from .functions import *
class ABN(nn.Module):
"""Activated Batch Normalization
This gathers a `BatchNorm2d` and an activation function in a single module
"""
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
"""Creates an Activated Batch Normalization module
Parameters
----------
num_features : int
Number of feature channels in the input and output.
eps : float
Small constant to prevent numerical issues.
momentum : float
Momentum factor applied to compute running statistics as.
affine : bool
If `True` apply learned scale and shift transformation after normalization.
activation : str
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
slope : float
Negative slope for the `leaky_relu` activation.
"""
super(ABN, self).__init__()
self.num_features = num_features
self.affine = affine
self.eps = eps
self.momentum = momentum
self.activation = activation
self.slope = slope
if self.affine:
self.weight = nn.Parameter(torch.ones(num_features))
self.bias = nn.Parameter(torch.zeros(num_features))
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
self.register_buffer('running_mean', torch.zeros(num_features))
self.register_buffer('running_var', torch.ones(num_features))
self.reset_parameters()
def reset_parameters(self):
nn.init.constant_(self.running_mean, 0)
nn.init.constant_(self.running_var, 1)
if self.affine:
nn.init.constant_(self.weight, 1)
nn.init.constant_(self.bias, 0)
def forward(self, x):
x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
self.training, self.momentum, self.eps)
if self.activation == ACT_RELU:
return functional.relu(x, inplace=True)
elif self.activation == ACT_LEAKY_RELU:
return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
elif self.activation == ACT_ELU:
return functional.elu(x, inplace=True)
else:
return x
def __repr__(self):
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
' affine={affine}, activation={activation}'
if self.activation == "leaky_relu":
rep += ', slope={slope})'
else:
rep += ')'
return rep.format(name=self.__class__.__name__, **self.__dict__)
class InPlaceABN(ABN):
"""InPlace Activated Batch Normalization"""
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
"""Creates an InPlace Activated Batch Normalization module
Parameters
----------
num_features : int
Number of feature channels in the input and output.
eps : float
Small constant to prevent numerical issues.
momentum : float
Momentum factor applied to compute running statistics as.
affine : bool
If `True` apply learned scale and shift transformation after normalization.
activation : str
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
slope : float
Negative slope for the `leaky_relu` activation.
"""
super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
def forward(self, x):
return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
self.training, self.momentum, self.eps, self.activation, self.slope)
class InPlaceABNSync(ABN):
"""InPlace Activated Batch Normalization with cross-GPU synchronization
This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
"""
def forward(self, x):
return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
self.training, self.momentum, self.eps, self.activation, self.slope)
def __repr__(self):
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
' affine={affine}, activation={activation}'
if self.activation == "leaky_relu":
rep += ', slope={slope})'
else:
rep += ')'
return rep.format(name=self.__class__.__name__, **self.__dict__)

@ -1,84 +0,0 @@
import torch
import torch.nn as nn
import torch.nn.functional as functional
from models._util import try_index
from .bn import ABN
class DeeplabV3(nn.Module):
def __init__(self,
in_channels,
out_channels,
hidden_channels=256,
dilations=(12, 24, 36),
norm_act=ABN,
pooling_size=None):
super(DeeplabV3, self).__init__()
self.pooling_size = pooling_size
self.map_convs = nn.ModuleList([
nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
])
self.map_bn = norm_act(hidden_channels * 4)
self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
self.global_pooling_bn = norm_act(hidden_channels)
self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
self.red_bn = norm_act(out_channels)
self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
def reset_parameters(self, activation, slope):
gain = nn.init.calculate_gain(activation, slope)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight.data, gain)
if hasattr(m, "bias") and m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, ABN):
if hasattr(m, "weight") and m.weight is not None:
nn.init.constant_(m.weight, 1)
if hasattr(m, "bias") and m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
# Map convolutions
out = torch.cat([m(x) for m in self.map_convs], dim=1)
out = self.map_bn(out)
out = self.red_conv(out)
# Global pooling
pool = self._global_pooling(x)
pool = self.global_pooling_conv(pool)
pool = self.global_pooling_bn(pool)
pool = self.pool_red_conv(pool)
if self.training or self.pooling_size is None:
pool = pool.repeat(1, 1, x.size(2), x.size(3))
out += pool
out = self.red_bn(out)
return out
def _global_pooling(self, x):
if self.training or self.pooling_size is None:
pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
pool = pool.view(x.size(0), x.size(1), 1, 1)
else:
pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
min(try_index(self.pooling_size, 1), x.shape[3]))
padding = (
(pooling_size[1] - 1) // 2,
(pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
(pooling_size[0] - 1) // 2,
(pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
)
pool = functional.avg_pool2d(x, pooling_size, stride=1)
pool = functional.pad(pool, pad=padding, mode="replicate")
return pool

@ -1,42 +0,0 @@
from collections import OrderedDict
import torch
import torch.nn as nn
from .bn import ABN
class DenseModule(nn.Module):
def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
super(DenseModule, self).__init__()
self.in_channels = in_channels
self.growth = growth
self.layers = layers
self.convs1 = nn.ModuleList()
self.convs3 = nn.ModuleList()
for i in range(self.layers):
self.convs1.append(nn.Sequential(OrderedDict([
("bn", norm_act(in_channels)),
("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
])))
self.convs3.append(nn.Sequential(OrderedDict([
("bn", norm_act(self.growth * bottleneck_factor)),
("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
dilation=dilation))
])))
in_channels += self.growth
@property
def out_channels(self):
return self.in_channels + self.growth * self.layers
def forward(self, x):
inputs = [x]
for i in range(self.layers):
x = torch.cat(inputs, dim=1)
x = self.convs1[i](x)
x = self.convs3[i](x)
inputs += [x]
return torch.cat(inputs, dim=1)

@ -1,234 +0,0 @@
from os import path
import torch
import torch.distributed as dist
import torch.autograd as autograd
import torch.cuda.comm as comm
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load
_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
_backend = load(name="inplace_abn",
extra_cflags=["-O3"],
sources=[path.join(_src_path, f) for f in [
"inplace_abn.cpp",
"inplace_abn_cpu.cpp",
"inplace_abn_cuda.cu",
"inplace_abn_cuda_half.cu"
]],
extra_cuda_cflags=["--expt-extended-lambda"])
# Activation names
ACT_RELU = "relu"
ACT_LEAKY_RELU = "leaky_relu"
ACT_ELU = "elu"
ACT_NONE = "none"
def _check(fn, *args, **kwargs):
success = fn(*args, **kwargs)
if not success:
raise RuntimeError("CUDA Error encountered in {}".format(fn))
def _broadcast_shape(x):
out_size = []
for i, s in enumerate(x.size()):
if i != 1:
out_size.append(1)
else:
out_size.append(s)
return out_size
def _reduce(x):
if len(x.size()) == 2:
return x.sum(dim=0)
else:
n, c = x.size()[0:2]
return x.contiguous().view((n, c, -1)).sum(2).sum(0)
def _count_samples(x):
count = 1
for i, s in enumerate(x.size()):
if i != 1:
count *= s
return count
def _act_forward(ctx, x):
if ctx.activation == ACT_LEAKY_RELU:
_backend.leaky_relu_forward(x, ctx.slope)
elif ctx.activation == ACT_ELU:
_backend.elu_forward(x)
elif ctx.activation == ACT_NONE:
pass
def _act_backward(ctx, x, dx):
if ctx.activation == ACT_LEAKY_RELU:
_backend.leaky_relu_backward(x, dx, ctx.slope)
elif ctx.activation == ACT_ELU:
_backend.elu_backward(x, dx)
elif ctx.activation == ACT_NONE:
pass
class InPlaceABN(autograd.Function):
@staticmethod
def forward(ctx, x, weight, bias, running_mean, running_var,
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
# Save context
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
ctx.affine = weight is not None and bias is not None
# Prepare inputs
count = _count_samples(x)
x = x.contiguous()
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
if ctx.training:
mean, var = _backend.mean_var(x)
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
else:
mean, var = running_mean.contiguous(), running_var.contiguous()
ctx.mark_dirty(x)
# BN forward + activation
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, var, weight, bias)
return x
@staticmethod
@once_differentiable
def backward(ctx, dz):
z, var, weight, bias = ctx.saved_tensors
dz = dz.contiguous()
# Undo activation
_act_backward(ctx, z, dz)
if ctx.training:
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
else:
# TODO: implement simplified CUDA backward for inference mode
edz = dz.new_zeros(dz.size(1))
eydz = dz.new_zeros(dz.size(1))
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
dweight = eydz * weight.sign() if ctx.affine else None
dbias = edz if ctx.affine else None
return dx, dweight, dbias, None, None, None, None, None, None, None
class InPlaceABNSync(autograd.Function):
@classmethod
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
# Save context
ctx.training = training
ctx.momentum = momentum
ctx.eps = eps
ctx.activation = activation
ctx.slope = slope
ctx.affine = weight is not None and bias is not None
# Prepare inputs
ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
#count = _count_samples(x)
batch_size = x.new_tensor([x.shape[0]],dtype=torch.long)
x = x.contiguous()
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
if ctx.training:
mean, var = _backend.mean_var(x)
if ctx.world_size>1:
# get global batch size
if equal_batches:
batch_size *= ctx.world_size
else:
dist.all_reduce(batch_size, dist.ReduceOp.SUM)
ctx.factor = x.shape[0]/float(batch_size.item())
mean_all = mean.clone() * ctx.factor
dist.all_reduce(mean_all, dist.ReduceOp.SUM)
var_all = (var + (mean - mean_all) ** 2) * ctx.factor
dist.all_reduce(var_all, dist.ReduceOp.SUM)
mean = mean_all
var = var_all
# Update running stats
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1]
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
# Mark in-place modified tensors
ctx.mark_dirty(x, running_mean, running_var)
else:
mean, var = running_mean.contiguous(), running_var.contiguous()
ctx.mark_dirty(x)
# BN forward + activation
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
_act_forward(ctx, x)
# Output
ctx.var = var
ctx.save_for_backward(x, var, weight, bias)
return x
@staticmethod
@once_differentiable
def backward(ctx, dz):
z, var, weight, bias = ctx.saved_tensors
dz = dz.contiguous()
# Undo activation
_act_backward(ctx, z, dz)
if ctx.training:
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
edz_local = edz.clone()
eydz_local = eydz.clone()
if ctx.world_size>1:
edz *= ctx.factor
dist.all_reduce(edz, dist.ReduceOp.SUM)
eydz *= ctx.factor
dist.all_reduce(eydz, dist.ReduceOp.SUM)
else:
edz_local = edz = dz.new_zeros(dz.size(1))
eydz_local = eydz = dz.new_zeros(dz.size(1))
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
dweight = eydz_local * weight.sign() if ctx.affine else None
dbias = edz_local if ctx.affine else None
return dx, dweight, dbias, None, None, None, None, None, None, None
inplace_abn = InPlaceABN.apply
inplace_abn_sync = InPlaceABNSync.apply
__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]

@ -1,21 +0,0 @@
import torch.nn as nn
import torch
import torch.distributed as dist
class GlobalAvgPool2d(nn.Module):
def __init__(self):
"""Global average pooling over the input's spatial dimensions"""
super(GlobalAvgPool2d, self).__init__()
def forward(self, inputs):
in_size = inputs.size()
return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
class SingleGPU(nn.Module):
def __init__(self, module):
super(SingleGPU, self).__init__()
self.module=module
def forward(self, input):
return self.module(input.cuda(non_blocking=True))

@ -1,88 +0,0 @@
from collections import OrderedDict
import torch.nn as nn
from .bn import ABN
class IdentityResidualBlock(nn.Module):
def __init__(self,
in_channels,
channels,
stride=1,
dilation=1,
groups=1,
norm_act=ABN,
dropout=None):
"""Configurable identity-mapping residual block
Parameters
----------
in_channels : int
Number of input channels.
channels : list of int
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
`3 x 3` then `1 x 1` convolutions.
stride : int
Stride of the first `3 x 3` convolution
dilation : int
Dilation to apply to the `3 x 3` convolutions.
groups : int
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
bottleneck blocks.
norm_act : callable
Function to create normalization / activation Module.
dropout: callable
Function to create Dropout Module.
"""
super(IdentityResidualBlock, self).__init__()
# Check parameters for inconsistencies
if len(channels) != 2 and len(channels) != 3:
raise ValueError("channels must contain either two or three values")
if len(channels) == 2 and groups != 1:
raise ValueError("groups > 1 are only valid if len(channels) == 3")
is_bottleneck = len(channels) == 3
need_proj_conv = stride != 1 or in_channels != channels[-1]
self.bn1 = norm_act(in_channels)
if not is_bottleneck:
layers = [
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
dilation=dilation)),
("bn2", norm_act(channels[0])),
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
dilation=dilation))
]
if dropout is not None:
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
else:
layers = [
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
("bn2", norm_act(channels[0])),
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
groups=groups, dilation=dilation)),
("bn3", norm_act(channels[1])),
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
]
if dropout is not None:
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
self.convs = nn.Sequential(OrderedDict(layers))
if need_proj_conv:
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
def forward(self, x):
if hasattr(self, "proj_conv"):
bn1 = self.bn1(x)
shortcut = self.proj_conv(bn1)
else:
shortcut = x.clone()
bn1 = self.bn1(x)
out = self.convs(bn1)
out.add_(shortcut)
return out

@ -1,15 +0,0 @@
#pragma once
#include <ATen/ATen.h>
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
#ifndef AT_CHECK
#define AT_CHECK AT_ASSERT
#endif
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)

@ -1,95 +0,0 @@
#include <torch/extension.h>
#include <vector>
#include "inplace_abn.h"
std::vector<at::Tensor> mean_var(at::Tensor x) {
if (x.is_cuda()) {
if (x.type().scalarType() == at::ScalarType::Half) {
return mean_var_cuda_h(x);
} else {
return mean_var_cuda(x);
}
} else {
return mean_var_cpu(x);
}
}
at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
if (x.is_cuda()) {
if (x.type().scalarType() == at::ScalarType::Half) {
return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
} else {
return forward_cuda(x, mean, var, weight, bias, affine, eps);
}
} else {
return forward_cpu(x, mean, var, weight, bias, affine, eps);
}
}
std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
if (z.is_cuda()) {
if (z.type().scalarType() == at::ScalarType::Half) {
return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
} else {
return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
}
} else {
return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
}
}
at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
if (z.is_cuda()) {
if (z.type().scalarType() == at::ScalarType::Half) {
return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
} else {
return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
}
} else {
return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
}
}
void leaky_relu_forward(at::Tensor z, float slope) {
at::leaky_relu_(z, slope);
}
void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
if (z.is_cuda()) {
if (z.type().scalarType() == at::ScalarType::Half) {
return leaky_relu_backward_cuda_h(z, dz, slope);
} else {
return leaky_relu_backward_cuda(z, dz, slope);
}
} else {
return leaky_relu_backward_cpu(z, dz, slope);
}
}
void elu_forward(at::Tensor z) {
at::elu_(z);
}
void elu_backward(at::Tensor z, at::Tensor dz) {
if (z.is_cuda()) {
return elu_backward_cuda(z, dz);
} else {
return elu_backward_cpu(z, dz);
}
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("mean_var", &mean_var, "Mean and variance computation");
m.def("forward", &forward, "In-place forward computation");
m.def("edz_eydz", &edz_eydz, "First part of backward computation");
m.def("backward", &backward, "Second part of backward computation");
m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
m.def("elu_forward", &elu_forward, "Elu forward computation");
m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
}

@ -1,88 +0,0 @@
#pragma once
#include <ATen/ATen.h>
#include <vector>
std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps);
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
void elu_backward_cpu(at::Tensor z, at::Tensor dz);
void elu_backward_cuda(at::Tensor z, at::Tensor dz);
static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
num = x.size(0);
chn = x.size(1);
sp = 1;
for (int64_t i = 2; i < x.ndimension(); ++i)
sp *= x.size(i);
}
/*
* Specialized CUDA reduction functions for BN
*/
#ifdef __CUDACC__
#include "utils/cuda.cuh"
template <typename T, typename Op>
__device__ T reduce(Op op, int plane, int N, int S) {
T sum = (T)0;
for (int batch = 0; batch < N; ++batch) {
for (int x = threadIdx.x; x < S; x += blockDim.x) {
sum += op(batch, plane, x);
}
}
// sum over NumThreads within a warp
sum = warpSum(sum);
// 'transpose', and reduce within warp again
__shared__ T shared[32];
__syncthreads();
if (threadIdx.x % WARP_SIZE == 0) {
shared[threadIdx.x / WARP_SIZE] = sum;
}
if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
// zero out the other entries in shared
shared[threadIdx.x] = (T)0;
}
__syncthreads();
if (threadIdx.x / WARP_SIZE == 0) {
sum = warpSum(shared[threadIdx.x]);
if (threadIdx.x == 0) {
shared[0] = sum;
}
}
__syncthreads();
// Everyone picks it up, should be broadcast into the whole gradInput
return shared[0];
}
#endif

@ -1,119 +0,0 @@
#include <ATen/ATen.h>
#include <vector>
#include "utils/checks.h"
#include "inplace_abn.h"
at::Tensor reduce_sum(at::Tensor x) {
if (x.ndimension() == 2) {
return x.sum(0);
} else {
auto x_view = x.view({x.size(0), x.size(1), -1});
return x_view.sum(-1).sum(0);
}
}
at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
if (x.ndimension() == 2) {
return v;
} else {
std::vector<int64_t> broadcast_size = {1, -1};
for (int64_t i = 2; i < x.ndimension(); ++i)
broadcast_size.push_back(1);
return v.view(broadcast_size);
}
}
int64_t count(at::Tensor x) {
int64_t count = x.size(0);
for (int64_t i = 2; i < x.ndimension(); ++i)
count *= x.size(i);
return count;
}
at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
if (affine) {
return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
} else {
return z;
}
}
std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
auto num = count(x);
auto mean = reduce_sum(x) / num;
auto diff = x - broadcast_to(mean, x);
auto var = reduce_sum(diff.pow(2)) / num;
return {mean, var};
}
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
auto mul = at::rsqrt(var + eps) * gamma;
x.sub_(broadcast_to(mean, x));
x.mul_(broadcast_to(mul, x));
if (affine) x.add_(broadcast_to(bias, x));
return x;
}
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
auto edz = reduce_sum(dz);
auto y = invert_affine(z, weight, bias, affine, eps);
auto eydz = reduce_sum(y * dz);
return {edz, eydz};
}
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
auto y = invert_affine(z, weight, bias, affine, eps);
auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
auto num = count(z);
auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
return dx;
}
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
CHECK_CPU_INPUT(z);
CHECK_CPU_INPUT(dz);
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
int64_t count = z.numel();
auto *_z = z.data<scalar_t>();
auto *_dz = dz.data<scalar_t>();
for (int64_t i = 0; i < count; ++i) {
if (_z[i] < 0) {
_z[i] *= 1 / slope;
_dz[i] *= slope;
}
}
}));
}
void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
CHECK_CPU_INPUT(z);
CHECK_CPU_INPUT(dz);
AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
int64_t count = z.numel();
auto *_z = z.data<scalar_t>();
auto *_dz = dz.data<scalar_t>();
for (int64_t i = 0; i < count; ++i) {
if (_z[i] < 0) {
_z[i] = log1p(_z[i]);
_dz[i] *= (_z[i] + 1.f);
}
}
}));
}

@ -1,333 +0,0 @@
#include <ATen/ATen.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include <vector>
#include "utils/checks.h"
#include "utils/cuda.cuh"
#include "inplace_abn.h"
#include <ATen/cuda/CUDAContext.h>
// Operations for reduce
template<typename T>
struct SumOp {
__device__ SumOp(const T *t, int c, int s)
: tensor(t), chn(c), sp(s) {}
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
return tensor[(batch * chn + plane) * sp + n];
}
const T *tensor;
const int chn;
const int sp;
};
template<typename T>
struct VarOp {
__device__ VarOp(T m, const T *t, int c, int s)
: mean(m), tensor(t), chn(c), sp(s) {}
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
T val = tensor[(batch * chn + plane) * sp + n];
return (val - mean) * (val - mean);
}
const T mean;
const T *tensor;
const int chn;
const int sp;
};
template<typename T>
struct GradOp {
__device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
__device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
T _dz = dz[(batch * chn + plane) * sp + n];
return Pair<T>(_dz, _y * _dz);
}
const T weight;
const T bias;
const T *z;
const T *dz;
const int chn;
const int sp;
};
/***********
* mean_var
***********/
template<typename T>
__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
int plane = blockIdx.x;
T norm = T(1) / T(num * sp);
T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
__syncthreads();
T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
if (threadIdx.x == 0) {
mean[plane] = _mean;
var[plane] = _var;
}
}
std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
CHECK_CUDA_INPUT(x);
// Extract dimensions
int64_t num, chn, sp;
get_dims(x, num, chn, sp);
// Prepare output tensors
auto mean = at::empty({chn}, x.options());
auto var = at::empty({chn}, x.options());
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
x.data<scalar_t>(),
mean.data<scalar_t>(),
var.data<scalar_t>(),
num, chn, sp);
}));
return {mean, var};
}
/**********
* forward
**********/
template<typename T>
__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
T _mean = mean[plane];
T _var = var[plane];
T _weight = affine ? abs(weight[plane]) + eps : T(1);
T _bias = affine ? bias[plane] : T(0);
T mul = rsqrt(_var + eps) * _weight;
for (int batch = 0; batch < num; ++batch) {
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
T _x = x[(batch * chn + plane) * sp + n];
T _y = (_x - _mean) * mul + _bias;
x[(batch * chn + plane) * sp + n] = _y;
}
}
}
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
CHECK_CUDA_INPUT(x);
CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
// Extract dimensions
int64_t num, chn, sp;
get_dims(x, num, chn, sp);
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
x.data<scalar_t>(),
mean.data<scalar_t>(),
var.data<scalar_t>(),
weight.data<scalar_t>(),
bias.data<scalar_t>(),
affine, eps, num, chn, sp);
}));
return x;
}
/***********
* edz_eydz
***********/
template<typename T>
__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
T _bias = affine ? bias[plane] : 0.f;
Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
__syncthreads();
if (threadIdx.x == 0) {
edz[plane] = res.v1;
eydz[plane] = res.v2;
}
}
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
// Extract dimensions
int64_t num, chn, sp;
get_dims(z, num, chn, sp);
auto edz = at::empty({chn}, z.options());
auto eydz = at::empty({chn}, z.options());
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
z.data<scalar_t>(),
dz.data<scalar_t>(),
weight.data<scalar_t>(),
bias.data<scalar_t>(),
edz.data<scalar_t>(),
eydz.data<scalar_t>(),
affine, eps, num, chn, sp);
}));
return {edz, eydz};
}
/***********
* backward
***********/
template<typename T>
__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
T _bias = affine ? bias[plane] : 0.f;
T _var = var[plane];
T _edz = edz[plane];
T _eydz = eydz[plane];
T _mul = _weight * rsqrt(_var + eps);
T count = T(num * sp);
for (int batch = 0; batch < num; ++batch) {
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
T _dz = dz[(batch * chn + plane) * sp + n];
T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
}
}
}
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
CHECK_CUDA_INPUT(var);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
CHECK_CUDA_INPUT(edz);
CHECK_CUDA_INPUT(eydz);
// Extract dimensions
int64_t num, chn, sp;
get_dims(z, num, chn, sp);
auto dx = at::zeros_like(z);
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
z.data<scalar_t>(),
dz.data<scalar_t>(),
var.data<scalar_t>(),
weight.data<scalar_t>(),
bias.data<scalar_t>(),
edz.data<scalar_t>(),
eydz.data<scalar_t>(),
dx.data<scalar_t>(),
affine, eps, num, chn, sp);
}));
return dx;
}
/**************
* activations
**************/
template<typename T>
inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
// Create thrust pointers
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
auto stream = at::cuda::getCurrentCUDAStream();
thrust::transform_if(thrust::cuda::par.on(stream),
th_dz, th_dz + count, th_z, th_dz,
[slope] __device__ (const T& dz) { return dz * slope; },
[] __device__ (const T& z) { return z < 0; });
thrust::transform_if(thrust::cuda::par.on(stream),
th_z, th_z + count, th_z,
[slope] __device__ (const T& z) { return z / slope; },
[] __device__ (const T& z) { return z < 0; });
}
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
int64_t count = z.numel();
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
}));
}
template<typename T>
inline void elu_backward_impl(T *z, T *dz, int64_t count) {
// Create thrust pointers
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
auto stream = at::cuda::getCurrentCUDAStream();
thrust::transform_if(thrust::cuda::par.on(stream),
th_dz, th_dz + count, th_z, th_z, th_dz,
[] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
[] __device__ (const T& z) { return z < 0; });
thrust::transform_if(thrust::cuda::par.on(stream),
th_z, th_z + count, th_z,
[] __device__ (const T& z) { return log1p(z); },
[] __device__ (const T& z) { return z < 0; });
}
void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
int64_t count = z.numel();
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
}));
}

@ -1,275 +0,0 @@
#include <ATen/ATen.h>
#include <cuda_fp16.h>
#include <vector>
#include "utils/checks.h"
#include "utils/cuda.cuh"
#include "inplace_abn.h"
#include <ATen/cuda/CUDAContext.h>
// Operations for reduce
struct SumOpH {
__device__ SumOpH(const half *t, int c, int s)
: tensor(t), chn(c), sp(s) {}
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
return __half2float(tensor[(batch * chn + plane) * sp + n]);
}
const half *tensor;
const int chn;
const int sp;
};
struct VarOpH {
__device__ VarOpH(float m, const half *t, int c, int s)
: mean(m), tensor(t), chn(c), sp(s) {}
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
return (t - mean) * (t - mean);
}
const float mean;
const half *tensor;
const int chn;
const int sp;
};
struct GradOpH {
__device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
__device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
return Pair<float>(_dz, _y * _dz);
}
const float weight;
const float bias;
const half *z;
const half *dz;
const int chn;
const int sp;
};
/***********
* mean_var
***********/
__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
int plane = blockIdx.x;
float norm = 1.f / static_cast<float>(num * sp);
float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
__syncthreads();
float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
if (threadIdx.x == 0) {
mean[plane] = _mean;
var[plane] = _var;
}
}
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
CHECK_CUDA_INPUT(x);
// Extract dimensions
int64_t num, chn, sp;
get_dims(x, num, chn, sp);
// Prepare output tensors
auto mean = at::empty({chn},x.options().dtype(at::kFloat));
auto var = at::empty({chn},x.options().dtype(at::kFloat));
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
reinterpret_cast<half*>(x.data<at::Half>()),
mean.data<float>(),
var.data<float>(),
num, chn, sp);
return {mean, var};
}
/**********
* forward
**********/
__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
const float _mean = mean[plane];
const float _var = var[plane];
const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
const float _bias = affine ? bias[plane] : 0.f;
const float mul = rsqrt(_var + eps) * _weight;
for (int batch = 0; batch < num; ++batch) {
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
half *x_ptr = x + (batch * chn + plane) * sp + n;
float _x = __half2float(*x_ptr);
float _y = (_x - _mean) * mul + _bias;
*x_ptr = __float2half(_y);
}
}
}
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
CHECK_CUDA_INPUT(x);
CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
// Extract dimensions
int64_t num, chn, sp;
get_dims(x, num, chn, sp);
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
forward_kernel_h<<<blocks, threads, 0, stream>>>(
reinterpret_cast<half*>(x.data<at::Half>()),
mean.data<float>(),
var.data<float>(),
weight.data<float>(),
bias.data<float>(),
affine, eps, num, chn, sp);
return x;
}
__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
float _bias = affine ? bias[plane] : 0.f;
Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
__syncthreads();
if (threadIdx.x == 0) {
edz[plane] = res.v1;
eydz[plane] = res.v2;
}
}
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
bool affine, float eps) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
// Extract dimensions
int64_t num, chn, sp;
get_dims(z, num, chn, sp);
auto edz = at::empty({chn},z.options().dtype(at::kFloat));
auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
reinterpret_cast<half*>(z.data<at::Half>()),
reinterpret_cast<half*>(dz.data<at::Half>()),
weight.data<float>(),
bias.data<float>(),
edz.data<float>(),
eydz.data<float>(),
affine, eps, num, chn, sp);
return {edz, eydz};
}
__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
int plane = blockIdx.x;
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
float _bias = affine ? bias[plane] : 0.f;
float _var = var[plane];
float _edz = edz[plane];
float _eydz = eydz[plane];
float _mul = _weight * rsqrt(_var + eps);
float count = float(num * sp);
for (int batch = 0; batch < num; ++batch) {
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
}
}
}
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
CHECK_CUDA_INPUT(var);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
CHECK_CUDA_INPUT(edz);
CHECK_CUDA_INPUT(eydz);
// Extract dimensions
int64_t num, chn, sp;
get_dims(z, num, chn, sp);
auto dx = at::zeros_like(z);
// Run kernel
dim3 blocks(chn);
dim3 threads(getNumThreads(sp));
auto stream = at::cuda::getCurrentCUDAStream();
backward_kernel_h<<<blocks, threads, 0, stream>>>(
reinterpret_cast<half*>(z.data<at::Half>()),
reinterpret_cast<half*>(dz.data<at::Half>()),
var.data<float>(),
weight.data<float>(),
bias.data<float>(),
edz.data<float>(),
eydz.data<float>(),
reinterpret_cast<half*>(dx.data<at::Half>()),
affine, eps, num, chn, sp);
return dx;
}
__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
float _z = __half2float(z[i]);
if (_z < 0) {
dz[i] = __float2half(__half2float(dz[i]) * slope);
z[i] = __float2half(_z / slope);
}
}
}
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
CHECK_CUDA_INPUT(z);
CHECK_CUDA_INPUT(dz);
int64_t count = z.numel();
dim3 threads(getNumThreads(count));
dim3 blocks = (count + threads.x - 1) / threads.x;
auto stream = at::cuda::getCurrentCUDAStream();
leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
reinterpret_cast<half*>(z.data<at::Half>()),
reinterpret_cast<half*>(dz.data<at::Half>()),
slope, count);
}

@ -1,15 +0,0 @@
#pragma once
#include <ATen/ATen.h>
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
#ifndef AT_CHECK
#define AT_CHECK AT_ASSERT
#endif
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)

@ -1,49 +0,0 @@
#pragma once
#include <ATen/ATen.h>
/*
* Functions to share code between CPU and GPU
*/
#ifdef __CUDACC__
// CUDA versions
#define HOST_DEVICE __host__ __device__
#define INLINE_HOST_DEVICE __host__ __device__ inline
#define FLOOR(x) floor(x)
#if __CUDA_ARCH__ >= 600
// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
#define ACCUM(x,y) atomicAdd_block(&(x),(y))
#else
// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
// and use the known atomicCAS-based implementation for double
template<typename data_t>
__device__ inline data_t atomic_add(data_t *address, data_t val) {
return atomicAdd(address, val);
}
template<>
__device__ inline double atomic_add(double *address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#define ACCUM(x,y) atomic_add(&(x),(y))
#endif // #if __CUDA_ARCH__ >= 600
#else
// CPU versions
#define HOST_DEVICE
#define INLINE_HOST_DEVICE inline
#define FLOOR(x) std::floor(x)
#define ACCUM(x,y) (x) += (y)
#endif // #ifdef __CUDACC__

@ -1,71 +0,0 @@
#pragma once
/*
* General settings and functions
*/
const int WARP_SIZE = 32;
const int MAX_BLOCK_SIZE = 1024;
static int getNumThreads(int nElem) {
int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
for (int i = 0; i < 6; ++i) {
if (nElem <= threadSizes[i]) {
return threadSizes[i];
}
}
return MAX_BLOCK_SIZE;
}
/*
* Reduction utilities
*/
template <typename T>
__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
unsigned int mask = 0xffffffff) {
#if CUDART_VERSION >= 9000
return __shfl_xor_sync(mask, value, laneMask, width);
#else
return __shfl_xor(value, laneMask, width);
#endif
}
__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
template<typename T>
struct Pair {
T v1, v2;
__device__ Pair() {}
__device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
__device__ Pair(T v) : v1(v), v2(v) {}
__device__ Pair(int v) : v1(v), v2(v) {}
__device__ Pair &operator+=(const Pair<T> &a) {
v1 += a.v1;
v2 += a.v2;
return *this;
}
};
template<typename T>
static __device__ __forceinline__ T warpSum(T val) {
#if __CUDA_ARCH__ >= 300
for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
}
#else
__shared__ T values[MAX_BLOCK_SIZE];
values[threadIdx.x] = val;
__threadfence_block();
const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
for (int i = 1; i < WARP_SIZE; i++) {
val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
}
#endif
return val;
}
template<typename T>
static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
value.v1 = warpSum(value.v1);
value.v2 = warpSum(value.v2);
return value;
}

@ -1,69 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import logging
logger = logging.getLogger()
class Optimizer(object):
def __init__(self,
model,
lr0,
momentum,
wd,
warmup_steps,
warmup_start_lr,
max_iter,
power,
*args, **kwargs):
self.warmup_steps = warmup_steps
self.warmup_start_lr = warmup_start_lr
self.lr0 = lr0
self.lr = self.lr0
self.max_iter = float(max_iter)
self.power = power
self.it = 0
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params()
param_list = [
{'params': wd_params},
{'params': nowd_params, 'weight_decay': 0},
{'params': lr_mul_wd_params, 'lr_mul': True},
{'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}]
self.optim = torch.optim.SGD(
param_list,
lr = lr0,
momentum = momentum,
weight_decay = wd)
self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps)
def get_lr(self):
if self.it <= self.warmup_steps:
lr = self.warmup_start_lr*(self.warmup_factor**self.it)
else:
factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power
lr = self.lr0 * factor
return lr
def step(self):
self.lr = self.get_lr()
for pg in self.optim.param_groups:
if pg.get('lr_mul', False):
pg['lr'] = self.lr * 10
else:
pg['lr'] = self.lr
if self.optim.defaults.get('lr_mul', False):
self.optim.defaults['lr'] = self.lr * 10
else:
self.optim.defaults['lr'] = self.lr
self.it += 1
self.optim.step()
if self.it == self.warmup_steps+2:
logger.info('==> warmup done, start to implement poly lr strategy')
def zero_grad(self):
self.optim.zero_grad()

@ -1,38 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import os.path as osp
import os
import cv2
from transform import *
from PIL import Image
face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
mask_path = '/home/zll/data/CelebAMask-HQ/mask'
counter = 0
total = 0
for i in range(15):
atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
for j in range(i * 2000, (i + 1) * 2000):
mask = np.zeros((512, 512))
for l, att in enumerate(atts, 1):
total += 1
file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
path = osp.join(face_sep_mask, str(i), file_name)
if os.path.exists(path):
counter += 1
sep_mask = np.array(Image.open(path).convert('P'))
# print(np.unique(sep_mask))
mask[sep_mask == 225] = l
cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
print(j)
print(counter, total)

@ -1,109 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as modelzoo
# from modules.bn import InPlaceABNSync as BatchNorm2d
resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, in_chan, out_chan, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_chan, out_chan, stride)
self.bn1 = nn.BatchNorm2d(out_chan)
self.conv2 = conv3x3(out_chan, out_chan)
self.bn2 = nn.BatchNorm2d(out_chan)
self.relu = nn.ReLU(inplace=True)
self.downsample = None
if in_chan != out_chan or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_chan, out_chan,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_chan),
)
def forward(self, x):
residual = self.conv1(x)
residual = F.relu(self.bn1(residual))
residual = self.conv2(residual)
residual = self.bn2(residual)
shortcut = x
if self.downsample is not None:
shortcut = self.downsample(x)
out = shortcut + residual
out = self.relu(out)
return out
def create_layer_basic(in_chan, out_chan, bnum, stride=1):
layers = [BasicBlock(in_chan, out_chan, stride=stride)]
for i in range(bnum-1):
layers.append(BasicBlock(out_chan, out_chan, stride=1))
return nn.Sequential(*layers)
class Resnet18(nn.Module):
def __init__(self):
super(Resnet18, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
self.init_weight()
def forward(self, x):
x = self.conv1(x)
x = F.relu(self.bn1(x))
x = self.maxpool(x)
x = self.layer1(x)
feat8 = self.layer2(x) # 1/8
feat16 = self.layer3(feat8) # 1/16
feat32 = self.layer4(feat16) # 1/32
return feat8, feat16, feat32
def init_weight(self):
state_dict = modelzoo.load_url(resnet18_url)
self_state_dict = self.state_dict()
for k, v in state_dict.items():
if 'fc' in k: continue
self_state_dict.update({k: v})
self.load_state_dict(self_state_dict)
def get_params(self):
wd_params, nowd_params = [], []
for name, module in self.named_modules():
if isinstance(module, (nn.Linear, nn.Conv2d)):
wd_params.append(module.weight)
if not module.bias is None:
nowd_params.append(module.bias)
elif isinstance(module, nn.BatchNorm2d):
nowd_params += list(module.parameters())
return wd_params, nowd_params
if __name__ == "__main__":
net = Resnet18()
x = torch.randn(16, 3, 224, 224)
out = net(x)
print(out[0].size())
print(out[1].size())
print(out[2].size())
net.get_params()

@ -1,100 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from logger import setup_logger
from model import BiSeNet
import torch
import os
import os.path as osp
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
import cv2
def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
# Colors for all 20 parts
part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
[255, 0, 85], [255, 0, 170],
[0, 255, 0], [85, 255, 0], [170, 255, 0],
[0, 255, 85], [0, 255, 170],
[0, 0, 255], [85, 0, 255], [170, 0, 255],
[0, 85, 255], [0, 170, 255],
[255, 255, 0], [255, 255, 85], [255, 255, 170],
[255, 0, 255], [255, 85, 255], [255, 170, 255],
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
im = np.array(im)
vis_im = im.copy().astype(np.uint8)
vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
num_of_class = np.max(vis_parsing_anno)
for pi in range(1, num_of_class + 1):
index = np.where(vis_parsing_anno == pi)
vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
# print(vis_parsing_anno_color.shape, vis_im.shape)
vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
# Save result or not
if save_im:
cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
# return vis_im
def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
if not os.path.exists(respth):
os.makedirs(respth)
n_classes = 19
net = BiSeNet(n_classes=n_classes)
save_pth = osp.join('res/cp', cp)
if torch.cuda.is_available():
net.cuda()
net.load_state_dict(torch.load(save_pth))
else:
net.load_state_dict(torch.load(save_pth, map_location=lambda storage, loc: storage))
net.eval()
to_tensor = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])
with torch.no_grad():
for image_path in os.listdir(dspth):
img = Image.open(osp.join(dspth, image_path))
image = img.resize((512, 512), Image.BILINEAR)
img = to_tensor(image)
img = torch.unsqueeze(img, 0)
if torch.cuda.is_available():
img = img.cuda()
out = net(img)[0]
if torch.cuda.is_available():
parsing = out.squeeze(0).cpu().numpy().argmax(0)
else:
parsing = out.squeeze(0).numpy().argmax(0)
# print(parsing)
print(np.unique(parsing))
vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
if __name__ == "__main__":
evaluate(dspth='makeup/116_ori.png', cp='79999_iter.pth')

@ -1,179 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from logger import setup_logger
from model import BiSeNet
from face_dataset import FaceMask
from loss import OhemCELoss
from evaluate import evaluate
from optimizer import Optimizer
import cv2
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.distributed as dist
import os
import os.path as osp
import logging
import time
import datetime
import argparse
respth = './res'
if not osp.exists(respth):
os.makedirs(respth)
logger = logging.getLogger()
def parse_args():
parse = argparse.ArgumentParser()
parse.add_argument(
'--local_rank',
dest = 'local_rank',
type = int,
default = -1,
)
return parse.parse_args()
def train():
args = parse_args()
torch.cuda.set_device(args.local_rank)
dist.init_process_group(
backend = 'nccl',
init_method = 'tcp://127.0.0.1:33241',
world_size = torch.cuda.device_count(),
rank=args.local_rank
)
setup_logger(respth)
# dataset
n_classes = 19
n_img_per_gpu = 16
n_workers = 8
cropsize = [448, 448]
data_root = '/home/zll/data/CelebAMask-HQ/'
ds = FaceMask(data_root, cropsize=cropsize, mode='train')
sampler = torch.utils.data.distributed.DistributedSampler(ds)
dl = DataLoader(ds,
batch_size = n_img_per_gpu,
shuffle = False,
sampler = sampler,
num_workers = n_workers,
pin_memory = True,
drop_last = True)
# model
ignore_idx = -100
net = BiSeNet(n_classes=n_classes)
net.cuda()
net.train()
net = nn.parallel.DistributedDataParallel(net,
device_ids = [args.local_rank, ],
output_device = args.local_rank
)
score_thres = 0.7
n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16
LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
## optimizer
momentum = 0.9
weight_decay = 5e-4
lr_start = 1e-2
max_iter = 80000
power = 0.9
warmup_steps = 1000
warmup_start_lr = 1e-5
optim = Optimizer(
model = net.module,
lr0 = lr_start,
momentum = momentum,
wd = weight_decay,
warmup_steps = warmup_steps,
warmup_start_lr = warmup_start_lr,
max_iter = max_iter,
power = power)
## train loop
msg_iter = 50
loss_avg = []
st = glob_st = time.time()
diter = iter(dl)
epoch = 0
for it in range(max_iter):
try:
im, lb = next(diter)
if not im.size()[0] == n_img_per_gpu:
raise StopIteration
except StopIteration:
epoch += 1
sampler.set_epoch(epoch)
diter = iter(dl)
im, lb = next(diter)
im = im.cuda()
lb = lb.cuda()
H, W = im.size()[2:]
lb = torch.squeeze(lb, 1)
optim.zero_grad()
out, out16, out32 = net(im)
lossp = LossP(out, lb)
loss2 = Loss2(out16, lb)
loss3 = Loss3(out32, lb)
loss = lossp + loss2 + loss3
loss.backward()
optim.step()
loss_avg.append(loss.item())
# print training log message
if (it+1) % msg_iter == 0:
loss_avg = sum(loss_avg) / len(loss_avg)
lr = optim.lr
ed = time.time()
t_intv, glob_t_intv = ed - st, ed - glob_st
eta = int((max_iter - it) * (glob_t_intv / it))
eta = str(datetime.timedelta(seconds=eta))
msg = ', '.join([
'it: {it}/{max_it}',
'lr: {lr:4f}',
'loss: {loss:.4f}',
'eta: {eta}',
'time: {time:.4f}',
]).format(
it = it+1,
max_it = max_iter,
lr = lr,
loss = loss_avg,
time = t_intv,
eta = eta
)
logger.info(msg)
loss_avg = []
st = ed
if dist.get_rank() == 0:
if (it+1) % 5000 == 0:
state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
if dist.get_rank() == 0:
torch.save(state, './res/cp/{}_iter.pth'.format(it))
evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it))
# dump the final model
save_pth = osp.join(respth, 'model_final_diss.pth')
# net.cpu()
state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
if dist.get_rank() == 0:
torch.save(state, save_pth)
logger.info('training done, model saved to: {}'.format(save_pth))
if __name__ == "__main__":
train()

@ -1,129 +0,0 @@
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from PIL import Image
import PIL.ImageEnhance as ImageEnhance
import random
import numpy as np
class RandomCrop(object):
def __init__(self, size, *args, **kwargs):
self.size = size
def __call__(self, im_lb):
im = im_lb['im']
lb = im_lb['lb']
assert im.size == lb.size
W, H = self.size
w, h = im.size
if (W, H) == (w, h): return dict(im=im, lb=lb)
if w < W or h < H:
scale = float(W) / w if w < h else float(H) / h
w, h = int(scale * w + 1), int(scale * h + 1)
im = im.resize((w, h), Image.BILINEAR)
lb = lb.resize((w, h), Image.NEAREST)
sw, sh = random.random() * (w - W), random.random() * (h - H)
crop = int(sw), int(sh), int(sw) + W, int(sh) + H
return dict(
im = im.crop(crop),
lb = lb.crop(crop)
)
class HorizontalFlip(object):
def __init__(self, p=0.5, *args, **kwargs):
self.p = p
def __call__(self, im_lb):
if random.random() > self.p:
return im_lb
else:
im = im_lb['im']
lb = im_lb['lb']
# atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',
# 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
flip_lb = np.array(lb)
flip_lb[lb == 2] = 3
flip_lb[lb == 3] = 2
flip_lb[lb == 4] = 5
flip_lb[lb == 5] = 4
flip_lb[lb == 7] = 8
flip_lb[lb == 8] = 7
flip_lb = Image.fromarray(flip_lb)
return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT),
lb = flip_lb.transpose(Image.FLIP_LEFT_RIGHT),
)
class RandomScale(object):
def __init__(self, scales=(1, ), *args, **kwargs):
self.scales = scales
def __call__(self, im_lb):
im = im_lb['im']
lb = im_lb['lb']
W, H = im.size
scale = random.choice(self.scales)
w, h = int(W * scale), int(H * scale)
return dict(im = im.resize((w, h), Image.BILINEAR),
lb = lb.resize((w, h), Image.NEAREST),
)
class ColorJitter(object):
def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs):
if not brightness is None and brightness>0:
self.brightness = [max(1-brightness, 0), 1+brightness]
if not contrast is None and contrast>0:
self.contrast = [max(1-contrast, 0), 1+contrast]
if not saturation is None and saturation>0:
self.saturation = [max(1-saturation, 0), 1+saturation]
def __call__(self, im_lb):
im = im_lb['im']
lb = im_lb['lb']
r_brightness = random.uniform(self.brightness[0], self.brightness[1])
r_contrast = random.uniform(self.contrast[0], self.contrast[1])
r_saturation = random.uniform(self.saturation[0], self.saturation[1])
im = ImageEnhance.Brightness(im).enhance(r_brightness)
im = ImageEnhance.Contrast(im).enhance(r_contrast)
im = ImageEnhance.Color(im).enhance(r_saturation)
return dict(im = im,
lb = lb,
)
class MultiScale(object):
def __init__(self, scales):
self.scales = scales
def __call__(self, img):
W, H = img.size
sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales]
imgs = []
[imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes]
return imgs
class Compose(object):
def __init__(self, do_list):
self.do_list = do_list
def __call__(self, im_lb):
for comp in self.do_list:
im_lb = comp(im_lb)
return im_lb
if __name__ == '__main__':
flip = HorizontalFlip(p = 1)
crop = RandomCrop((321, 321))
rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0))
img = Image.open('data/img.jpg')
lb = Image.open('data/label.png')
Loading…
Cancel
Save