mirror of https://github.com/kritiksoman/GIMP-ML
simplifyUpdate
parent
089d0245eb
commit
b68f8c1196
Binary file not shown.
@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 zll
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -1,95 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from logger import setup_logger
|
||||
from model import BiSeNet
|
||||
from face_dataset import FaceMask
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.nn.functional as F
|
||||
import torch.distributed as dist
|
||||
|
||||
import os
|
||||
import os.path as osp
|
||||
import logging
|
||||
import time
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import math
|
||||
from PIL import Image
|
||||
import torchvision.transforms as transforms
|
||||
import cv2
|
||||
|
||||
def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
|
||||
# Colors for all 20 parts
|
||||
part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
|
||||
[255, 0, 85], [255, 0, 170],
|
||||
[0, 255, 0], [85, 255, 0], [170, 255, 0],
|
||||
[0, 255, 85], [0, 255, 170],
|
||||
[0, 0, 255], [85, 0, 255], [170, 0, 255],
|
||||
[0, 85, 255], [0, 170, 255],
|
||||
[255, 255, 0], [255, 255, 85], [255, 255, 170],
|
||||
[255, 0, 255], [255, 85, 255], [255, 170, 255],
|
||||
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
||||
|
||||
im = np.array(im)
|
||||
vis_im = im.copy().astype(np.uint8)
|
||||
vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
|
||||
vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
|
||||
vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
|
||||
|
||||
num_of_class = np.max(vis_parsing_anno)
|
||||
|
||||
for pi in range(1, num_of_class + 1):
|
||||
index = np.where(vis_parsing_anno == pi)
|
||||
vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
|
||||
|
||||
vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
|
||||
# print(vis_parsing_anno_color.shape, vis_im.shape)
|
||||
vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
|
||||
|
||||
# Save result or not
|
||||
if save_im:
|
||||
cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
# return vis_im
|
||||
|
||||
def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
|
||||
|
||||
if not os.path.exists(respth):
|
||||
os.makedirs(respth)
|
||||
|
||||
n_classes = 19
|
||||
net = BiSeNet(n_classes=n_classes)
|
||||
net.cuda()
|
||||
save_pth = osp.join('res/cp', cp)
|
||||
net.load_state_dict(torch.load(save_pth))
|
||||
net.eval()
|
||||
|
||||
to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
with torch.no_grad():
|
||||
for image_path in os.listdir(dspth):
|
||||
img = Image.open(osp.join(dspth, image_path))
|
||||
image = img.resize((512, 512), Image.BILINEAR)
|
||||
img = to_tensor(image)
|
||||
img = torch.unsqueeze(img, 0)
|
||||
img = img.cuda()
|
||||
out = net(img)[0]
|
||||
parsing = out.squeeze(0).cpu().numpy().argmax(0)
|
||||
|
||||
vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger('./res')
|
||||
evaluate()
|
@ -1,106 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
from torch.utils.data import Dataset
|
||||
import torchvision.transforms as transforms
|
||||
|
||||
import os.path as osp
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import json
|
||||
import cv2
|
||||
|
||||
from transform import *
|
||||
|
||||
|
||||
|
||||
class FaceMask(Dataset):
|
||||
def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
|
||||
super(FaceMask, self).__init__(*args, **kwargs)
|
||||
assert mode in ('train', 'val', 'test')
|
||||
self.mode = mode
|
||||
self.ignore_lb = 255
|
||||
self.rootpth = rootpth
|
||||
|
||||
self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img'))
|
||||
|
||||
# pre-processing
|
||||
self.to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
self.trans_train = Compose([
|
||||
ColorJitter(
|
||||
brightness=0.5,
|
||||
contrast=0.5,
|
||||
saturation=0.5),
|
||||
HorizontalFlip(),
|
||||
RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
|
||||
RandomCrop(cropsize)
|
||||
])
|
||||
|
||||
def __getitem__(self, idx):
|
||||
impth = self.imgs[idx]
|
||||
img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth))
|
||||
img = img.resize((512, 512), Image.BILINEAR)
|
||||
label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P')
|
||||
# print(np.unique(np.array(label)))
|
||||
if self.mode == 'train':
|
||||
im_lb = dict(im=img, lb=label)
|
||||
im_lb = self.trans_train(im_lb)
|
||||
img, label = im_lb['im'], im_lb['lb']
|
||||
img = self.to_tensor(img)
|
||||
label = np.array(label).astype(np.int64)[np.newaxis, :]
|
||||
return img, label
|
||||
|
||||
def __len__(self):
|
||||
return len(self.imgs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
|
||||
face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
|
||||
mask_path = '/home/zll/data/CelebAMask-HQ/mask'
|
||||
counter = 0
|
||||
total = 0
|
||||
for i in range(15):
|
||||
# files = os.listdir(osp.join(face_sep_mask, str(i)))
|
||||
|
||||
atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
|
||||
'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
|
||||
|
||||
for j in range(i*2000, (i+1)*2000):
|
||||
|
||||
mask = np.zeros((512, 512))
|
||||
|
||||
for l, att in enumerate(atts, 1):
|
||||
total += 1
|
||||
file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
|
||||
path = osp.join(face_sep_mask, str(i), file_name)
|
||||
|
||||
if os.path.exists(path):
|
||||
counter += 1
|
||||
sep_mask = np.array(Image.open(path).convert('P'))
|
||||
# print(np.unique(sep_mask))
|
||||
|
||||
mask[sep_mask == 225] = l
|
||||
cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
|
||||
print(j)
|
||||
|
||||
print(counter, total)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,23 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import os.path as osp
|
||||
import time
|
||||
import sys
|
||||
import logging
|
||||
|
||||
import torch.distributed as dist
|
||||
|
||||
|
||||
def setup_logger(logpth):
|
||||
logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
|
||||
logfile = osp.join(logpth, logfile)
|
||||
FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
|
||||
log_level = logging.INFO
|
||||
if dist.is_initialized() and not dist.get_rank()==0:
|
||||
log_level = logging.ERROR
|
||||
logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
|
||||
logging.root.addHandler(logging.StreamHandler())
|
||||
|
||||
|
Binary file not shown.
@ -1,75 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class OhemCELoss(nn.Module):
|
||||
def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
|
||||
super(OhemCELoss, self).__init__()
|
||||
self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
|
||||
self.n_min = n_min
|
||||
self.ignore_lb = ignore_lb
|
||||
self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
|
||||
|
||||
def forward(self, logits, labels):
|
||||
N, C, H, W = logits.size()
|
||||
loss = self.criteria(logits, labels).view(-1)
|
||||
loss, _ = torch.sort(loss, descending=True)
|
||||
if loss[self.n_min] > self.thresh:
|
||||
loss = loss[loss>self.thresh]
|
||||
else:
|
||||
loss = loss[:self.n_min]
|
||||
return torch.mean(loss)
|
||||
|
||||
|
||||
class SoftmaxFocalLoss(nn.Module):
|
||||
def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
|
||||
super(SoftmaxFocalLoss, self).__init__()
|
||||
self.gamma = gamma
|
||||
self.nll = nn.NLLLoss(ignore_index=ignore_lb)
|
||||
|
||||
def forward(self, logits, labels):
|
||||
scores = F.softmax(logits, dim=1)
|
||||
factor = torch.pow(1.-scores, self.gamma)
|
||||
log_score = F.log_softmax(logits, dim=1)
|
||||
log_score = factor * log_score
|
||||
loss = self.nll(log_score, labels)
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
torch.manual_seed(15)
|
||||
criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
|
||||
criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
|
||||
net1 = nn.Sequential(
|
||||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
|
||||
)
|
||||
net1.cuda()
|
||||
net1.train()
|
||||
net2 = nn.Sequential(
|
||||
nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
|
||||
)
|
||||
net2.cuda()
|
||||
net2.train()
|
||||
|
||||
with torch.no_grad():
|
||||
inten = torch.randn(16, 3, 20, 20).cuda()
|
||||
lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
|
||||
lbs[1, :, :] = 255
|
||||
|
||||
logits1 = net1(inten)
|
||||
logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
|
||||
logits2 = net2(inten)
|
||||
logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
|
||||
|
||||
loss1 = criteria1(logits1, lbs)
|
||||
loss2 = criteria2(logits2, lbs)
|
||||
loss = loss1 + loss2
|
||||
print(loss.detach().cpu())
|
||||
loss.backward()
|
@ -1,130 +0,0 @@
|
||||
import cv2
|
||||
import os
|
||||
import numpy as np
|
||||
from skimage.filters import gaussian
|
||||
|
||||
|
||||
def sharpen(img):
|
||||
img = img * 1.0
|
||||
gauss_out = gaussian(img, sigma=5, multichannel=True)
|
||||
|
||||
alpha = 1.5
|
||||
img_out = (img - gauss_out) * alpha + img
|
||||
|
||||
img_out = img_out / 255.0
|
||||
|
||||
mask_1 = img_out < 0
|
||||
mask_2 = img_out > 1
|
||||
|
||||
img_out = img_out * (1 - mask_1)
|
||||
img_out = img_out * (1 - mask_2) + mask_2
|
||||
img_out = np.clip(img_out, 0, 1)
|
||||
img_out = img_out * 255
|
||||
return np.array(img_out, dtype=np.uint8)
|
||||
|
||||
|
||||
def hair(image, parsing, part=17, color=[230, 50, 20]):
|
||||
b, g, r = color #[10, 50, 250] # [10, 250, 10]
|
||||
tar_color = np.zeros_like(image)
|
||||
tar_color[:, :, 0] = b
|
||||
tar_color[:, :, 1] = g
|
||||
tar_color[:, :, 2] = r
|
||||
|
||||
image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||
tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV)
|
||||
|
||||
if part == 12 or part == 13:
|
||||
image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2]
|
||||
else:
|
||||
image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1]
|
||||
|
||||
changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR)
|
||||
|
||||
if part == 17:
|
||||
changed = sharpen(changed)
|
||||
|
||||
changed[parsing != part] = image[parsing != part]
|
||||
# changed = cv2.resize(changed, (512, 512))
|
||||
return changed
|
||||
|
||||
#
|
||||
# def lip(image, parsing, part=17, color=[230, 50, 20]):
|
||||
# b, g, r = color #[10, 50, 250] # [10, 250, 10]
|
||||
# tar_color = np.zeros_like(image)
|
||||
# tar_color[:, :, 0] = b
|
||||
# tar_color[:, :, 1] = g
|
||||
# tar_color[:, :, 2] = r
|
||||
#
|
||||
# image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
|
||||
# il, ia, ib = cv2.split(image_lab)
|
||||
#
|
||||
# tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab)
|
||||
# tl, ta, tb = cv2.split(tar_lab)
|
||||
#
|
||||
# image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100)
|
||||
# image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128)
|
||||
# image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128)
|
||||
#
|
||||
#
|
||||
# changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR)
|
||||
#
|
||||
# if part == 17:
|
||||
# changed = sharpen(changed)
|
||||
#
|
||||
# changed[parsing != part] = image[parsing != part]
|
||||
# # changed = cv2.resize(changed, (512, 512))
|
||||
# return changed
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 1 face
|
||||
# 10 nose
|
||||
# 11 teeth
|
||||
# 12 upper lip
|
||||
# 13 lower lip
|
||||
# 17 hair
|
||||
num = 116
|
||||
table = {
|
||||
'hair': 17,
|
||||
'upper_lip': 12,
|
||||
'lower_lip': 13
|
||||
}
|
||||
image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num)
|
||||
parsing_path = 'res/test_res/{}.png'.format(num)
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
ori = image.copy()
|
||||
parsing = np.array(cv2.imread(parsing_path, 0))
|
||||
parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
parts = [table['hair'], table['upper_lip'], table['lower_lip']]
|
||||
# colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]]
|
||||
colors = [[100, 200, 100]]
|
||||
for part, color in zip(parts, colors):
|
||||
image = hair(image, parsing, part, color)
|
||||
cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512)))
|
||||
cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512)))
|
||||
|
||||
cv2.imshow('image', cv2.resize(ori, (512, 512)))
|
||||
cv2.imshow('color', cv2.resize(image, (512, 512)))
|
||||
|
||||
# cv2.imshow('image', ori)
|
||||
# cv2.imshow('color', image)
|
||||
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,283 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
|
||||
from resnet import Resnet18
|
||||
# from modules.bn import InPlaceABNSync as BatchNorm2d
|
||||
|
||||
|
||||
class ConvBNReLU(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
|
||||
super(ConvBNReLU, self).__init__()
|
||||
self.conv = nn.Conv2d(in_chan,
|
||||
out_chan,
|
||||
kernel_size = ks,
|
||||
stride = stride,
|
||||
padding = padding,
|
||||
bias = False)
|
||||
self.bn = nn.BatchNorm2d(out_chan)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = F.relu(self.bn(x))
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
class BiSeNetOutput(nn.Module):
|
||||
def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
|
||||
super(BiSeNetOutput, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.conv_out(x)
|
||||
return x
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class AttentionRefinementModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(AttentionRefinementModule, self).__init__()
|
||||
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
|
||||
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
|
||||
self.bn_atten = nn.BatchNorm2d(out_chan)
|
||||
self.sigmoid_atten = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.conv(x)
|
||||
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
atten = self.conv_atten(atten)
|
||||
atten = self.bn_atten(atten)
|
||||
atten = self.sigmoid_atten(atten)
|
||||
out = torch.mul(feat, atten)
|
||||
return out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
|
||||
class ContextPath(nn.Module):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ContextPath, self).__init__()
|
||||
self.resnet = Resnet18()
|
||||
self.arm16 = AttentionRefinementModule(256, 128)
|
||||
self.arm32 = AttentionRefinementModule(512, 128)
|
||||
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
|
||||
self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
|
||||
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
H0, W0 = x.size()[2:]
|
||||
feat8, feat16, feat32 = self.resnet(x)
|
||||
H8, W8 = feat8.size()[2:]
|
||||
H16, W16 = feat16.size()[2:]
|
||||
H32, W32 = feat32.size()[2:]
|
||||
|
||||
avg = F.avg_pool2d(feat32, feat32.size()[2:])
|
||||
avg = self.conv_avg(avg)
|
||||
avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
|
||||
|
||||
feat32_arm = self.arm32(feat32)
|
||||
feat32_sum = feat32_arm + avg_up
|
||||
feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
|
||||
feat32_up = self.conv_head32(feat32_up)
|
||||
|
||||
feat16_arm = self.arm16(feat16)
|
||||
feat16_sum = feat16_arm + feat32_up
|
||||
feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
|
||||
feat16_up = self.conv_head16(feat16_up)
|
||||
|
||||
return feat8, feat16_up, feat32_up # x8, x8, x16
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
### This is not used, since I replace this with the resnet feature with the same size
|
||||
class SpatialPath(nn.Module):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(SpatialPath, self).__init__()
|
||||
self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
|
||||
self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
|
||||
self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
|
||||
self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.conv1(x)
|
||||
feat = self.conv2(feat)
|
||||
feat = self.conv3(feat)
|
||||
feat = self.conv_out(feat)
|
||||
return feat
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class FeatureFusionModule(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, *args, **kwargs):
|
||||
super(FeatureFusionModule, self).__init__()
|
||||
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
|
||||
self.conv1 = nn.Conv2d(out_chan,
|
||||
out_chan//4,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.conv2 = nn.Conv2d(out_chan//4,
|
||||
out_chan,
|
||||
kernel_size = 1,
|
||||
stride = 1,
|
||||
padding = 0,
|
||||
bias = False)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, fsp, fcp):
|
||||
fcat = torch.cat([fsp, fcp], dim=1)
|
||||
feat = self.convblk(fcat)
|
||||
atten = F.avg_pool2d(feat, feat.size()[2:])
|
||||
atten = self.conv1(atten)
|
||||
atten = self.relu(atten)
|
||||
atten = self.conv2(atten)
|
||||
atten = self.sigmoid(atten)
|
||||
feat_atten = torch.mul(feat, atten)
|
||||
feat_out = feat_atten + feat
|
||||
return feat_out
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
class BiSeNet(nn.Module):
|
||||
def __init__(self, n_classes, *args, **kwargs):
|
||||
super(BiSeNet, self).__init__()
|
||||
self.cp = ContextPath()
|
||||
## here self.sp is deleted
|
||||
self.ffm = FeatureFusionModule(256, 256)
|
||||
self.conv_out = BiSeNetOutput(256, 256, n_classes)
|
||||
self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
|
||||
self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
H, W = x.size()[2:]
|
||||
feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature
|
||||
feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature
|
||||
feat_fuse = self.ffm(feat_sp, feat_cp8)
|
||||
|
||||
feat_out = self.conv_out(feat_fuse)
|
||||
feat_out16 = self.conv_out16(feat_cp8)
|
||||
feat_out32 = self.conv_out32(feat_cp16)
|
||||
|
||||
feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
|
||||
feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
|
||||
feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
|
||||
return feat_out, feat_out16, feat_out32
|
||||
|
||||
def init_weight(self):
|
||||
for ly in self.children():
|
||||
if isinstance(ly, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(ly.weight, a=1)
|
||||
if not ly.bias is None: nn.init.constant_(ly.bias, 0)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
|
||||
for name, child in self.named_children():
|
||||
child_wd_params, child_nowd_params = child.get_params()
|
||||
if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
|
||||
lr_mul_wd_params += child_wd_params
|
||||
lr_mul_nowd_params += child_nowd_params
|
||||
else:
|
||||
wd_params += child_wd_params
|
||||
nowd_params += child_nowd_params
|
||||
return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
net = BiSeNet(19)
|
||||
net.cuda()
|
||||
net.eval()
|
||||
in_ten = torch.randn(16, 3, 640, 480).cuda()
|
||||
out, out16, out32 = net(in_ten)
|
||||
print(out.shape)
|
||||
|
||||
net.get_params()
|
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
from .bn import ABN, InPlaceABN, InPlaceABNSync
|
||||
from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
|
||||
from .misc import GlobalAvgPool2d, SingleGPU
|
||||
from .residual import IdentityResidualBlock
|
||||
from .dense import DenseModule
|
@ -1,130 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as functional
|
||||
|
||||
try:
|
||||
from queue import Queue
|
||||
except ImportError:
|
||||
from Queue import Queue
|
||||
|
||||
from .functions import *
|
||||
|
||||
|
||||
class ABN(nn.Module):
|
||||
"""Activated Batch Normalization
|
||||
|
||||
This gathers a `BatchNorm2d` and an activation function in a single module
|
||||
"""
|
||||
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
||||
"""Creates an Activated Batch Normalization module
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num_features : int
|
||||
Number of feature channels in the input and output.
|
||||
eps : float
|
||||
Small constant to prevent numerical issues.
|
||||
momentum : float
|
||||
Momentum factor applied to compute running statistics as.
|
||||
affine : bool
|
||||
If `True` apply learned scale and shift transformation after normalization.
|
||||
activation : str
|
||||
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
||||
slope : float
|
||||
Negative slope for the `leaky_relu` activation.
|
||||
"""
|
||||
super(ABN, self).__init__()
|
||||
self.num_features = num_features
|
||||
self.affine = affine
|
||||
self.eps = eps
|
||||
self.momentum = momentum
|
||||
self.activation = activation
|
||||
self.slope = slope
|
||||
if self.affine:
|
||||
self.weight = nn.Parameter(torch.ones(num_features))
|
||||
self.bias = nn.Parameter(torch.zeros(num_features))
|
||||
else:
|
||||
self.register_parameter('weight', None)
|
||||
self.register_parameter('bias', None)
|
||||
self.register_buffer('running_mean', torch.zeros(num_features))
|
||||
self.register_buffer('running_var', torch.ones(num_features))
|
||||
self.reset_parameters()
|
||||
|
||||
def reset_parameters(self):
|
||||
nn.init.constant_(self.running_mean, 0)
|
||||
nn.init.constant_(self.running_var, 1)
|
||||
if self.affine:
|
||||
nn.init.constant_(self.weight, 1)
|
||||
nn.init.constant_(self.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
|
||||
self.training, self.momentum, self.eps)
|
||||
|
||||
if self.activation == ACT_RELU:
|
||||
return functional.relu(x, inplace=True)
|
||||
elif self.activation == ACT_LEAKY_RELU:
|
||||
return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
|
||||
elif self.activation == ACT_ELU:
|
||||
return functional.elu(x, inplace=True)
|
||||
else:
|
||||
return x
|
||||
|
||||
def __repr__(self):
|
||||
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
||||
' affine={affine}, activation={activation}'
|
||||
if self.activation == "leaky_relu":
|
||||
rep += ', slope={slope})'
|
||||
else:
|
||||
rep += ')'
|
||||
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
||||
|
||||
|
||||
class InPlaceABN(ABN):
|
||||
"""InPlace Activated Batch Normalization"""
|
||||
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
|
||||
"""Creates an InPlace Activated Batch Normalization module
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num_features : int
|
||||
Number of feature channels in the input and output.
|
||||
eps : float
|
||||
Small constant to prevent numerical issues.
|
||||
momentum : float
|
||||
Momentum factor applied to compute running statistics as.
|
||||
affine : bool
|
||||
If `True` apply learned scale and shift transformation after normalization.
|
||||
activation : str
|
||||
Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
|
||||
slope : float
|
||||
Negative slope for the `leaky_relu` activation.
|
||||
"""
|
||||
super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
|
||||
|
||||
def forward(self, x):
|
||||
return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
|
||||
self.training, self.momentum, self.eps, self.activation, self.slope)
|
||||
|
||||
|
||||
class InPlaceABNSync(ABN):
|
||||
"""InPlace Activated Batch Normalization with cross-GPU synchronization
|
||||
This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
|
||||
"""
|
||||
|
||||
def forward(self, x):
|
||||
return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
|
||||
self.training, self.momentum, self.eps, self.activation, self.slope)
|
||||
|
||||
def __repr__(self):
|
||||
rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
|
||||
' affine={affine}, activation={activation}'
|
||||
if self.activation == "leaky_relu":
|
||||
rep += ', slope={slope})'
|
||||
else:
|
||||
rep += ')'
|
||||
return rep.format(name=self.__class__.__name__, **self.__dict__)
|
||||
|
||||
|
@ -1,84 +0,0 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as functional
|
||||
|
||||
from models._util import try_index
|
||||
from .bn import ABN
|
||||
|
||||
|
||||
class DeeplabV3(nn.Module):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
hidden_channels=256,
|
||||
dilations=(12, 24, 36),
|
||||
norm_act=ABN,
|
||||
pooling_size=None):
|
||||
super(DeeplabV3, self).__init__()
|
||||
self.pooling_size = pooling_size
|
||||
|
||||
self.map_convs = nn.ModuleList([
|
||||
nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
|
||||
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
|
||||
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
|
||||
nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
|
||||
])
|
||||
self.map_bn = norm_act(hidden_channels * 4)
|
||||
|
||||
self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
|
||||
self.global_pooling_bn = norm_act(hidden_channels)
|
||||
|
||||
self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
|
||||
self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
|
||||
self.red_bn = norm_act(out_channels)
|
||||
|
||||
self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
|
||||
|
||||
def reset_parameters(self, activation, slope):
|
||||
gain = nn.init.calculate_gain(activation, slope)
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.xavier_normal_(m.weight.data, gain)
|
||||
if hasattr(m, "bias") and m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
elif isinstance(m, ABN):
|
||||
if hasattr(m, "weight") and m.weight is not None:
|
||||
nn.init.constant_(m.weight, 1)
|
||||
if hasattr(m, "bias") and m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
# Map convolutions
|
||||
out = torch.cat([m(x) for m in self.map_convs], dim=1)
|
||||
out = self.map_bn(out)
|
||||
out = self.red_conv(out)
|
||||
|
||||
# Global pooling
|
||||
pool = self._global_pooling(x)
|
||||
pool = self.global_pooling_conv(pool)
|
||||
pool = self.global_pooling_bn(pool)
|
||||
pool = self.pool_red_conv(pool)
|
||||
if self.training or self.pooling_size is None:
|
||||
pool = pool.repeat(1, 1, x.size(2), x.size(3))
|
||||
|
||||
out += pool
|
||||
out = self.red_bn(out)
|
||||
return out
|
||||
|
||||
def _global_pooling(self, x):
|
||||
if self.training or self.pooling_size is None:
|
||||
pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
|
||||
pool = pool.view(x.size(0), x.size(1), 1, 1)
|
||||
else:
|
||||
pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
|
||||
min(try_index(self.pooling_size, 1), x.shape[3]))
|
||||
padding = (
|
||||
(pooling_size[1] - 1) // 2,
|
||||
(pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
|
||||
(pooling_size[0] - 1) // 2,
|
||||
(pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
|
||||
)
|
||||
|
||||
pool = functional.avg_pool2d(x, pooling_size, stride=1)
|
||||
pool = functional.pad(pool, pad=padding, mode="replicate")
|
||||
return pool
|
@ -1,42 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from .bn import ABN
|
||||
|
||||
|
||||
class DenseModule(nn.Module):
|
||||
def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
|
||||
super(DenseModule, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.growth = growth
|
||||
self.layers = layers
|
||||
|
||||
self.convs1 = nn.ModuleList()
|
||||
self.convs3 = nn.ModuleList()
|
||||
for i in range(self.layers):
|
||||
self.convs1.append(nn.Sequential(OrderedDict([
|
||||
("bn", norm_act(in_channels)),
|
||||
("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
|
||||
])))
|
||||
self.convs3.append(nn.Sequential(OrderedDict([
|
||||
("bn", norm_act(self.growth * bottleneck_factor)),
|
||||
("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
|
||||
dilation=dilation))
|
||||
])))
|
||||
in_channels += self.growth
|
||||
|
||||
@property
|
||||
def out_channels(self):
|
||||
return self.in_channels + self.growth * self.layers
|
||||
|
||||
def forward(self, x):
|
||||
inputs = [x]
|
||||
for i in range(self.layers):
|
||||
x = torch.cat(inputs, dim=1)
|
||||
x = self.convs1[i](x)
|
||||
x = self.convs3[i](x)
|
||||
inputs += [x]
|
||||
|
||||
return torch.cat(inputs, dim=1)
|
@ -1,234 +0,0 @@
|
||||
from os import path
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.autograd as autograd
|
||||
import torch.cuda.comm as comm
|
||||
from torch.autograd.function import once_differentiable
|
||||
from torch.utils.cpp_extension import load
|
||||
|
||||
_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
|
||||
_backend = load(name="inplace_abn",
|
||||
extra_cflags=["-O3"],
|
||||
sources=[path.join(_src_path, f) for f in [
|
||||
"inplace_abn.cpp",
|
||||
"inplace_abn_cpu.cpp",
|
||||
"inplace_abn_cuda.cu",
|
||||
"inplace_abn_cuda_half.cu"
|
||||
]],
|
||||
extra_cuda_cflags=["--expt-extended-lambda"])
|
||||
|
||||
# Activation names
|
||||
ACT_RELU = "relu"
|
||||
ACT_LEAKY_RELU = "leaky_relu"
|
||||
ACT_ELU = "elu"
|
||||
ACT_NONE = "none"
|
||||
|
||||
|
||||
def _check(fn, *args, **kwargs):
|
||||
success = fn(*args, **kwargs)
|
||||
if not success:
|
||||
raise RuntimeError("CUDA Error encountered in {}".format(fn))
|
||||
|
||||
|
||||
def _broadcast_shape(x):
|
||||
out_size = []
|
||||
for i, s in enumerate(x.size()):
|
||||
if i != 1:
|
||||
out_size.append(1)
|
||||
else:
|
||||
out_size.append(s)
|
||||
return out_size
|
||||
|
||||
|
||||
def _reduce(x):
|
||||
if len(x.size()) == 2:
|
||||
return x.sum(dim=0)
|
||||
else:
|
||||
n, c = x.size()[0:2]
|
||||
return x.contiguous().view((n, c, -1)).sum(2).sum(0)
|
||||
|
||||
|
||||
def _count_samples(x):
|
||||
count = 1
|
||||
for i, s in enumerate(x.size()):
|
||||
if i != 1:
|
||||
count *= s
|
||||
return count
|
||||
|
||||
|
||||
def _act_forward(ctx, x):
|
||||
if ctx.activation == ACT_LEAKY_RELU:
|
||||
_backend.leaky_relu_forward(x, ctx.slope)
|
||||
elif ctx.activation == ACT_ELU:
|
||||
_backend.elu_forward(x)
|
||||
elif ctx.activation == ACT_NONE:
|
||||
pass
|
||||
|
||||
|
||||
def _act_backward(ctx, x, dx):
|
||||
if ctx.activation == ACT_LEAKY_RELU:
|
||||
_backend.leaky_relu_backward(x, dx, ctx.slope)
|
||||
elif ctx.activation == ACT_ELU:
|
||||
_backend.elu_backward(x, dx)
|
||||
elif ctx.activation == ACT_NONE:
|
||||
pass
|
||||
|
||||
|
||||
class InPlaceABN(autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x, weight, bias, running_mean, running_var,
|
||||
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
|
||||
# Save context
|
||||
ctx.training = training
|
||||
ctx.momentum = momentum
|
||||
ctx.eps = eps
|
||||
ctx.activation = activation
|
||||
ctx.slope = slope
|
||||
ctx.affine = weight is not None and bias is not None
|
||||
|
||||
# Prepare inputs
|
||||
count = _count_samples(x)
|
||||
x = x.contiguous()
|
||||
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
||||
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
||||
|
||||
if ctx.training:
|
||||
mean, var = _backend.mean_var(x)
|
||||
|
||||
# Update running stats
|
||||
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
||||
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
|
||||
|
||||
# Mark in-place modified tensors
|
||||
ctx.mark_dirty(x, running_mean, running_var)
|
||||
else:
|
||||
mean, var = running_mean.contiguous(), running_var.contiguous()
|
||||
ctx.mark_dirty(x)
|
||||
|
||||
# BN forward + activation
|
||||
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
||||
_act_forward(ctx, x)
|
||||
|
||||
# Output
|
||||
ctx.var = var
|
||||
ctx.save_for_backward(x, var, weight, bias)
|
||||
return x
|
||||
|
||||
@staticmethod
|
||||
@once_differentiable
|
||||
def backward(ctx, dz):
|
||||
z, var, weight, bias = ctx.saved_tensors
|
||||
dz = dz.contiguous()
|
||||
|
||||
# Undo activation
|
||||
_act_backward(ctx, z, dz)
|
||||
|
||||
if ctx.training:
|
||||
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
||||
else:
|
||||
# TODO: implement simplified CUDA backward for inference mode
|
||||
edz = dz.new_zeros(dz.size(1))
|
||||
eydz = dz.new_zeros(dz.size(1))
|
||||
|
||||
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
||||
dweight = eydz * weight.sign() if ctx.affine else None
|
||||
dbias = edz if ctx.affine else None
|
||||
|
||||
return dx, dweight, dbias, None, None, None, None, None, None, None
|
||||
|
||||
class InPlaceABNSync(autograd.Function):
|
||||
@classmethod
|
||||
def forward(cls, ctx, x, weight, bias, running_mean, running_var,
|
||||
training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
|
||||
# Save context
|
||||
ctx.training = training
|
||||
ctx.momentum = momentum
|
||||
ctx.eps = eps
|
||||
ctx.activation = activation
|
||||
ctx.slope = slope
|
||||
ctx.affine = weight is not None and bias is not None
|
||||
|
||||
# Prepare inputs
|
||||
ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
|
||||
|
||||
#count = _count_samples(x)
|
||||
batch_size = x.new_tensor([x.shape[0]],dtype=torch.long)
|
||||
|
||||
x = x.contiguous()
|
||||
weight = weight.contiguous() if ctx.affine else x.new_empty(0)
|
||||
bias = bias.contiguous() if ctx.affine else x.new_empty(0)
|
||||
|
||||
if ctx.training:
|
||||
mean, var = _backend.mean_var(x)
|
||||
if ctx.world_size>1:
|
||||
# get global batch size
|
||||
if equal_batches:
|
||||
batch_size *= ctx.world_size
|
||||
else:
|
||||
dist.all_reduce(batch_size, dist.ReduceOp.SUM)
|
||||
|
||||
ctx.factor = x.shape[0]/float(batch_size.item())
|
||||
|
||||
mean_all = mean.clone() * ctx.factor
|
||||
dist.all_reduce(mean_all, dist.ReduceOp.SUM)
|
||||
|
||||
var_all = (var + (mean - mean_all) ** 2) * ctx.factor
|
||||
dist.all_reduce(var_all, dist.ReduceOp.SUM)
|
||||
|
||||
mean = mean_all
|
||||
var = var_all
|
||||
|
||||
# Update running stats
|
||||
running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
|
||||
count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1]
|
||||
running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
|
||||
|
||||
# Mark in-place modified tensors
|
||||
ctx.mark_dirty(x, running_mean, running_var)
|
||||
else:
|
||||
mean, var = running_mean.contiguous(), running_var.contiguous()
|
||||
ctx.mark_dirty(x)
|
||||
|
||||
# BN forward + activation
|
||||
_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
|
||||
_act_forward(ctx, x)
|
||||
|
||||
# Output
|
||||
ctx.var = var
|
||||
ctx.save_for_backward(x, var, weight, bias)
|
||||
return x
|
||||
|
||||
@staticmethod
|
||||
@once_differentiable
|
||||
def backward(ctx, dz):
|
||||
z, var, weight, bias = ctx.saved_tensors
|
||||
dz = dz.contiguous()
|
||||
|
||||
# Undo activation
|
||||
_act_backward(ctx, z, dz)
|
||||
|
||||
if ctx.training:
|
||||
edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
|
||||
edz_local = edz.clone()
|
||||
eydz_local = eydz.clone()
|
||||
|
||||
if ctx.world_size>1:
|
||||
edz *= ctx.factor
|
||||
dist.all_reduce(edz, dist.ReduceOp.SUM)
|
||||
|
||||
eydz *= ctx.factor
|
||||
dist.all_reduce(eydz, dist.ReduceOp.SUM)
|
||||
else:
|
||||
edz_local = edz = dz.new_zeros(dz.size(1))
|
||||
eydz_local = eydz = dz.new_zeros(dz.size(1))
|
||||
|
||||
dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
|
||||
dweight = eydz_local * weight.sign() if ctx.affine else None
|
||||
dbias = edz_local if ctx.affine else None
|
||||
|
||||
return dx, dweight, dbias, None, None, None, None, None, None, None
|
||||
|
||||
inplace_abn = InPlaceABN.apply
|
||||
inplace_abn_sync = InPlaceABNSync.apply
|
||||
|
||||
__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
|
@ -1,21 +0,0 @@
|
||||
import torch.nn as nn
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
class GlobalAvgPool2d(nn.Module):
|
||||
def __init__(self):
|
||||
"""Global average pooling over the input's spatial dimensions"""
|
||||
super(GlobalAvgPool2d, self).__init__()
|
||||
|
||||
def forward(self, inputs):
|
||||
in_size = inputs.size()
|
||||
return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
|
||||
|
||||
class SingleGPU(nn.Module):
|
||||
def __init__(self, module):
|
||||
super(SingleGPU, self).__init__()
|
||||
self.module=module
|
||||
|
||||
def forward(self, input):
|
||||
return self.module(input.cuda(non_blocking=True))
|
||||
|
@ -1,88 +0,0 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
from .bn import ABN
|
||||
|
||||
|
||||
class IdentityResidualBlock(nn.Module):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
channels,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
norm_act=ABN,
|
||||
dropout=None):
|
||||
"""Configurable identity-mapping residual block
|
||||
|
||||
Parameters
|
||||
----------
|
||||
in_channels : int
|
||||
Number of input channels.
|
||||
channels : list of int
|
||||
Number of channels in the internal feature maps. Can either have two or three elements: if three construct
|
||||
a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
|
||||
`3 x 3` then `1 x 1` convolutions.
|
||||
stride : int
|
||||
Stride of the first `3 x 3` convolution
|
||||
dilation : int
|
||||
Dilation to apply to the `3 x 3` convolutions.
|
||||
groups : int
|
||||
Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
|
||||
bottleneck blocks.
|
||||
norm_act : callable
|
||||
Function to create normalization / activation Module.
|
||||
dropout: callable
|
||||
Function to create Dropout Module.
|
||||
"""
|
||||
super(IdentityResidualBlock, self).__init__()
|
||||
|
||||
# Check parameters for inconsistencies
|
||||
if len(channels) != 2 and len(channels) != 3:
|
||||
raise ValueError("channels must contain either two or three values")
|
||||
if len(channels) == 2 and groups != 1:
|
||||
raise ValueError("groups > 1 are only valid if len(channels) == 3")
|
||||
|
||||
is_bottleneck = len(channels) == 3
|
||||
need_proj_conv = stride != 1 or in_channels != channels[-1]
|
||||
|
||||
self.bn1 = norm_act(in_channels)
|
||||
if not is_bottleneck:
|
||||
layers = [
|
||||
("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
|
||||
dilation=dilation)),
|
||||
("bn2", norm_act(channels[0])),
|
||||
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
||||
dilation=dilation))
|
||||
]
|
||||
if dropout is not None:
|
||||
layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
|
||||
else:
|
||||
layers = [
|
||||
("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
|
||||
("bn2", norm_act(channels[0])),
|
||||
("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
|
||||
groups=groups, dilation=dilation)),
|
||||
("bn3", norm_act(channels[1])),
|
||||
("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
|
||||
]
|
||||
if dropout is not None:
|
||||
layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
|
||||
self.convs = nn.Sequential(OrderedDict(layers))
|
||||
|
||||
if need_proj_conv:
|
||||
self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
if hasattr(self, "proj_conv"):
|
||||
bn1 = self.bn1(x)
|
||||
shortcut = self.proj_conv(bn1)
|
||||
else:
|
||||
shortcut = x.clone()
|
||||
bn1 = self.bn1(x)
|
||||
|
||||
out = self.convs(bn1)
|
||||
out.add_(shortcut)
|
||||
|
||||
return out
|
@ -1,15 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
||||
#ifndef AT_CHECK
|
||||
#define AT_CHECK AT_ASSERT
|
||||
#endif
|
||||
|
||||
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
||||
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
||||
|
||||
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
@ -1,95 +0,0 @@
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "inplace_abn.h"
|
||||
|
||||
std::vector<at::Tensor> mean_var(at::Tensor x) {
|
||||
if (x.is_cuda()) {
|
||||
if (x.type().scalarType() == at::ScalarType::Half) {
|
||||
return mean_var_cuda_h(x);
|
||||
} else {
|
||||
return mean_var_cuda(x);
|
||||
}
|
||||
} else {
|
||||
return mean_var_cpu(x);
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
if (x.is_cuda()) {
|
||||
if (x.type().scalarType() == at::ScalarType::Half) {
|
||||
return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
|
||||
} else {
|
||||
return forward_cuda(x, mean, var, weight, bias, affine, eps);
|
||||
}
|
||||
} else {
|
||||
return forward_cpu(x, mean, var, weight, bias, affine, eps);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
if (z.is_cuda()) {
|
||||
if (z.type().scalarType() == at::ScalarType::Half) {
|
||||
return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
|
||||
} else {
|
||||
return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
|
||||
}
|
||||
} else {
|
||||
return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
||||
if (z.is_cuda()) {
|
||||
if (z.type().scalarType() == at::ScalarType::Half) {
|
||||
return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
||||
} else {
|
||||
return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
||||
}
|
||||
} else {
|
||||
return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
|
||||
}
|
||||
}
|
||||
|
||||
void leaky_relu_forward(at::Tensor z, float slope) {
|
||||
at::leaky_relu_(z, slope);
|
||||
}
|
||||
|
||||
void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
|
||||
if (z.is_cuda()) {
|
||||
if (z.type().scalarType() == at::ScalarType::Half) {
|
||||
return leaky_relu_backward_cuda_h(z, dz, slope);
|
||||
} else {
|
||||
return leaky_relu_backward_cuda(z, dz, slope);
|
||||
}
|
||||
} else {
|
||||
return leaky_relu_backward_cpu(z, dz, slope);
|
||||
}
|
||||
}
|
||||
|
||||
void elu_forward(at::Tensor z) {
|
||||
at::elu_(z);
|
||||
}
|
||||
|
||||
void elu_backward(at::Tensor z, at::Tensor dz) {
|
||||
if (z.is_cuda()) {
|
||||
return elu_backward_cuda(z, dz);
|
||||
} else {
|
||||
return elu_backward_cpu(z, dz);
|
||||
}
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
|
||||
m.def("mean_var", &mean_var, "Mean and variance computation");
|
||||
m.def("forward", &forward, "In-place forward computation");
|
||||
m.def("edz_eydz", &edz_eydz, "First part of backward computation");
|
||||
m.def("backward", &backward, "Second part of backward computation");
|
||||
m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
|
||||
m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
|
||||
m.def("elu_forward", &elu_forward, "Elu forward computation");
|
||||
m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
|
||||
}
|
@ -1,88 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
|
||||
std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
|
||||
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
|
||||
|
||||
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
|
||||
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps);
|
||||
|
||||
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
||||
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
||||
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps);
|
||||
|
||||
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
|
||||
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
|
||||
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
|
||||
|
||||
void elu_backward_cpu(at::Tensor z, at::Tensor dz);
|
||||
void elu_backward_cuda(at::Tensor z, at::Tensor dz);
|
||||
|
||||
static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
|
||||
num = x.size(0);
|
||||
chn = x.size(1);
|
||||
sp = 1;
|
||||
for (int64_t i = 2; i < x.ndimension(); ++i)
|
||||
sp *= x.size(i);
|
||||
}
|
||||
|
||||
/*
|
||||
* Specialized CUDA reduction functions for BN
|
||||
*/
|
||||
#ifdef __CUDACC__
|
||||
|
||||
#include "utils/cuda.cuh"
|
||||
|
||||
template <typename T, typename Op>
|
||||
__device__ T reduce(Op op, int plane, int N, int S) {
|
||||
T sum = (T)0;
|
||||
for (int batch = 0; batch < N; ++batch) {
|
||||
for (int x = threadIdx.x; x < S; x += blockDim.x) {
|
||||
sum += op(batch, plane, x);
|
||||
}
|
||||
}
|
||||
|
||||
// sum over NumThreads within a warp
|
||||
sum = warpSum(sum);
|
||||
|
||||
// 'transpose', and reduce within warp again
|
||||
__shared__ T shared[32];
|
||||
__syncthreads();
|
||||
if (threadIdx.x % WARP_SIZE == 0) {
|
||||
shared[threadIdx.x / WARP_SIZE] = sum;
|
||||
}
|
||||
if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
|
||||
// zero out the other entries in shared
|
||||
shared[threadIdx.x] = (T)0;
|
||||
}
|
||||
__syncthreads();
|
||||
if (threadIdx.x / WARP_SIZE == 0) {
|
||||
sum = warpSum(shared[threadIdx.x]);
|
||||
if (threadIdx.x == 0) {
|
||||
shared[0] = sum;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Everyone picks it up, should be broadcast into the whole gradInput
|
||||
return shared[0];
|
||||
}
|
||||
#endif
|
@ -1,119 +0,0 @@
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "utils/checks.h"
|
||||
#include "inplace_abn.h"
|
||||
|
||||
at::Tensor reduce_sum(at::Tensor x) {
|
||||
if (x.ndimension() == 2) {
|
||||
return x.sum(0);
|
||||
} else {
|
||||
auto x_view = x.view({x.size(0), x.size(1), -1});
|
||||
return x_view.sum(-1).sum(0);
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
|
||||
if (x.ndimension() == 2) {
|
||||
return v;
|
||||
} else {
|
||||
std::vector<int64_t> broadcast_size = {1, -1};
|
||||
for (int64_t i = 2; i < x.ndimension(); ++i)
|
||||
broadcast_size.push_back(1);
|
||||
|
||||
return v.view(broadcast_size);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t count(at::Tensor x) {
|
||||
int64_t count = x.size(0);
|
||||
for (int64_t i = 2; i < x.ndimension(); ++i)
|
||||
count *= x.size(i);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
|
||||
if (affine) {
|
||||
return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
|
||||
} else {
|
||||
return z;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
|
||||
auto num = count(x);
|
||||
auto mean = reduce_sum(x) / num;
|
||||
auto diff = x - broadcast_to(mean, x);
|
||||
auto var = reduce_sum(diff.pow(2)) / num;
|
||||
|
||||
return {mean, var};
|
||||
}
|
||||
|
||||
at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
|
||||
auto mul = at::rsqrt(var + eps) * gamma;
|
||||
|
||||
x.sub_(broadcast_to(mean, x));
|
||||
x.mul_(broadcast_to(mul, x));
|
||||
if (affine) x.add_(broadcast_to(bias, x));
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
auto edz = reduce_sum(dz);
|
||||
auto y = invert_affine(z, weight, bias, affine, eps);
|
||||
auto eydz = reduce_sum(y * dz);
|
||||
|
||||
return {edz, eydz};
|
||||
}
|
||||
|
||||
at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
||||
auto y = invert_affine(z, weight, bias, affine, eps);
|
||||
auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
|
||||
|
||||
auto num = count(z);
|
||||
auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
|
||||
return dx;
|
||||
}
|
||||
|
||||
void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
|
||||
CHECK_CPU_INPUT(z);
|
||||
CHECK_CPU_INPUT(dz);
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
|
||||
int64_t count = z.numel();
|
||||
auto *_z = z.data<scalar_t>();
|
||||
auto *_dz = dz.data<scalar_t>();
|
||||
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
if (_z[i] < 0) {
|
||||
_z[i] *= 1 / slope;
|
||||
_dz[i] *= slope;
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
|
||||
CHECK_CPU_INPUT(z);
|
||||
CHECK_CPU_INPUT(dz);
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
|
||||
int64_t count = z.numel();
|
||||
auto *_z = z.data<scalar_t>();
|
||||
auto *_dz = dz.data<scalar_t>();
|
||||
|
||||
for (int64_t i = 0; i < count; ++i) {
|
||||
if (_z[i] < 0) {
|
||||
_z[i] = log1p(_z[i]);
|
||||
_dz[i] *= (_z[i] + 1.f);
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
@ -1,333 +0,0 @@
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/transform.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "utils/checks.h"
|
||||
#include "utils/cuda.cuh"
|
||||
#include "inplace_abn.h"
|
||||
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
|
||||
// Operations for reduce
|
||||
template<typename T>
|
||||
struct SumOp {
|
||||
__device__ SumOp(const T *t, int c, int s)
|
||||
: tensor(t), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
||||
return tensor[(batch * chn + plane) * sp + n];
|
||||
}
|
||||
const T *tensor;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct VarOp {
|
||||
__device__ VarOp(T m, const T *t, int c, int s)
|
||||
: mean(m), tensor(t), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ T operator()(int batch, int plane, int n) {
|
||||
T val = tensor[(batch * chn + plane) * sp + n];
|
||||
return (val - mean) * (val - mean);
|
||||
}
|
||||
const T mean;
|
||||
const T *tensor;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct GradOp {
|
||||
__device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
|
||||
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
|
||||
T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
|
||||
T _dz = dz[(batch * chn + plane) * sp + n];
|
||||
return Pair<T>(_dz, _y * _dz);
|
||||
}
|
||||
const T weight;
|
||||
const T bias;
|
||||
const T *z;
|
||||
const T *dz;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
/***********
|
||||
* mean_var
|
||||
***********/
|
||||
|
||||
template<typename T>
|
||||
__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
T norm = T(1) / T(num * sp);
|
||||
|
||||
T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
|
||||
__syncthreads();
|
||||
T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
mean[plane] = _mean;
|
||||
var[plane] = _var;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
|
||||
CHECK_CUDA_INPUT(x);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(x, num, chn, sp);
|
||||
|
||||
// Prepare output tensors
|
||||
auto mean = at::empty({chn}, x.options());
|
||||
auto var = at::empty({chn}, x.options());
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
|
||||
mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
x.data<scalar_t>(),
|
||||
mean.data<scalar_t>(),
|
||||
var.data<scalar_t>(),
|
||||
num, chn, sp);
|
||||
}));
|
||||
|
||||
return {mean, var};
|
||||
}
|
||||
|
||||
/**********
|
||||
* forward
|
||||
**********/
|
||||
|
||||
template<typename T>
|
||||
__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
|
||||
bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
T _mean = mean[plane];
|
||||
T _var = var[plane];
|
||||
T _weight = affine ? abs(weight[plane]) + eps : T(1);
|
||||
T _bias = affine ? bias[plane] : T(0);
|
||||
|
||||
T mul = rsqrt(_var + eps) * _weight;
|
||||
|
||||
for (int batch = 0; batch < num; ++batch) {
|
||||
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
||||
T _x = x[(batch * chn + plane) * sp + n];
|
||||
T _y = (_x - _mean) * mul + _bias;
|
||||
|
||||
x[(batch * chn + plane) * sp + n] = _y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(x);
|
||||
CHECK_CUDA_INPUT(mean);
|
||||
CHECK_CUDA_INPUT(var);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(x, num, chn, sp);
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
|
||||
forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
x.data<scalar_t>(),
|
||||
mean.data<scalar_t>(),
|
||||
var.data<scalar_t>(),
|
||||
weight.data<scalar_t>(),
|
||||
bias.data<scalar_t>(),
|
||||
affine, eps, num, chn, sp);
|
||||
}));
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
/***********
|
||||
* edz_eydz
|
||||
***********/
|
||||
|
||||
template<typename T>
|
||||
__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
|
||||
T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
||||
T _bias = affine ? bias[plane] : 0.f;
|
||||
|
||||
Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
edz[plane] = res.v1;
|
||||
eydz[plane] = res.v2;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(z, num, chn, sp);
|
||||
|
||||
auto edz = at::empty({chn}, z.options());
|
||||
auto eydz = at::empty({chn}, z.options());
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
|
||||
edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
z.data<scalar_t>(),
|
||||
dz.data<scalar_t>(),
|
||||
weight.data<scalar_t>(),
|
||||
bias.data<scalar_t>(),
|
||||
edz.data<scalar_t>(),
|
||||
eydz.data<scalar_t>(),
|
||||
affine, eps, num, chn, sp);
|
||||
}));
|
||||
|
||||
return {edz, eydz};
|
||||
}
|
||||
|
||||
/***********
|
||||
* backward
|
||||
***********/
|
||||
|
||||
template<typename T>
|
||||
__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
|
||||
const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
T _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
||||
T _bias = affine ? bias[plane] : 0.f;
|
||||
T _var = var[plane];
|
||||
T _edz = edz[plane];
|
||||
T _eydz = eydz[plane];
|
||||
|
||||
T _mul = _weight * rsqrt(_var + eps);
|
||||
T count = T(num * sp);
|
||||
|
||||
for (int batch = 0; batch < num; ++batch) {
|
||||
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
||||
T _dz = dz[(batch * chn + plane) * sp + n];
|
||||
T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
|
||||
|
||||
dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
CHECK_CUDA_INPUT(var);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
CHECK_CUDA_INPUT(edz);
|
||||
CHECK_CUDA_INPUT(eydz);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(z, num, chn, sp);
|
||||
|
||||
auto dx = at::zeros_like(z);
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
|
||||
backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
z.data<scalar_t>(),
|
||||
dz.data<scalar_t>(),
|
||||
var.data<scalar_t>(),
|
||||
weight.data<scalar_t>(),
|
||||
bias.data<scalar_t>(),
|
||||
edz.data<scalar_t>(),
|
||||
eydz.data<scalar_t>(),
|
||||
dx.data<scalar_t>(),
|
||||
affine, eps, num, chn, sp);
|
||||
}));
|
||||
|
||||
return dx;
|
||||
}
|
||||
|
||||
/**************
|
||||
* activations
|
||||
**************/
|
||||
|
||||
template<typename T>
|
||||
inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
|
||||
// Create thrust pointers
|
||||
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
||||
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
||||
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
thrust::transform_if(thrust::cuda::par.on(stream),
|
||||
th_dz, th_dz + count, th_z, th_dz,
|
||||
[slope] __device__ (const T& dz) { return dz * slope; },
|
||||
[] __device__ (const T& z) { return z < 0; });
|
||||
thrust::transform_if(thrust::cuda::par.on(stream),
|
||||
th_z, th_z + count, th_z,
|
||||
[slope] __device__ (const T& z) { return z / slope; },
|
||||
[] __device__ (const T& z) { return z < 0; });
|
||||
}
|
||||
|
||||
void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
|
||||
int64_t count = z.numel();
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
||||
leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
|
||||
}));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void elu_backward_impl(T *z, T *dz, int64_t count) {
|
||||
// Create thrust pointers
|
||||
thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
|
||||
thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
|
||||
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
thrust::transform_if(thrust::cuda::par.on(stream),
|
||||
th_dz, th_dz + count, th_z, th_z, th_dz,
|
||||
[] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
|
||||
[] __device__ (const T& z) { return z < 0; });
|
||||
thrust::transform_if(thrust::cuda::par.on(stream),
|
||||
th_z, th_z + count, th_z,
|
||||
[] __device__ (const T& z) { return log1p(z); },
|
||||
[] __device__ (const T& z) { return z < 0; });
|
||||
}
|
||||
|
||||
void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
|
||||
int64_t count = z.numel();
|
||||
|
||||
AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
|
||||
elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
|
||||
}));
|
||||
}
|
@ -1,275 +0,0 @@
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "utils/checks.h"
|
||||
#include "utils/cuda.cuh"
|
||||
#include "inplace_abn.h"
|
||||
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
|
||||
// Operations for reduce
|
||||
struct SumOpH {
|
||||
__device__ SumOpH(const half *t, int c, int s)
|
||||
: tensor(t), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
||||
return __half2float(tensor[(batch * chn + plane) * sp + n]);
|
||||
}
|
||||
const half *tensor;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
struct VarOpH {
|
||||
__device__ VarOpH(float m, const half *t, int c, int s)
|
||||
: mean(m), tensor(t), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ float operator()(int batch, int plane, int n) {
|
||||
const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
|
||||
return (t - mean) * (t - mean);
|
||||
}
|
||||
const float mean;
|
||||
const half *tensor;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
struct GradOpH {
|
||||
__device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
|
||||
: weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
|
||||
__device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
|
||||
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
|
||||
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
||||
return Pair<float>(_dz, _y * _dz);
|
||||
}
|
||||
const float weight;
|
||||
const float bias;
|
||||
const half *z;
|
||||
const half *dz;
|
||||
const int chn;
|
||||
const int sp;
|
||||
};
|
||||
|
||||
/***********
|
||||
* mean_var
|
||||
***********/
|
||||
|
||||
__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
float norm = 1.f / static_cast<float>(num * sp);
|
||||
|
||||
float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
|
||||
__syncthreads();
|
||||
float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
mean[plane] = _mean;
|
||||
var[plane] = _var;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
|
||||
CHECK_CUDA_INPUT(x);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(x, num, chn, sp);
|
||||
|
||||
// Prepare output tensors
|
||||
auto mean = at::empty({chn},x.options().dtype(at::kFloat));
|
||||
auto var = at::empty({chn},x.options().dtype(at::kFloat));
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
|
||||
reinterpret_cast<half*>(x.data<at::Half>()),
|
||||
mean.data<float>(),
|
||||
var.data<float>(),
|
||||
num, chn, sp);
|
||||
|
||||
return {mean, var};
|
||||
}
|
||||
|
||||
/**********
|
||||
* forward
|
||||
**********/
|
||||
|
||||
__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
|
||||
bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
const float _mean = mean[plane];
|
||||
const float _var = var[plane];
|
||||
const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
||||
const float _bias = affine ? bias[plane] : 0.f;
|
||||
|
||||
const float mul = rsqrt(_var + eps) * _weight;
|
||||
|
||||
for (int batch = 0; batch < num; ++batch) {
|
||||
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
||||
half *x_ptr = x + (batch * chn + plane) * sp + n;
|
||||
float _x = __half2float(*x_ptr);
|
||||
float _y = (_x - _mean) * mul + _bias;
|
||||
|
||||
*x_ptr = __float2half(_y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(x);
|
||||
CHECK_CUDA_INPUT(mean);
|
||||
CHECK_CUDA_INPUT(var);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(x, num, chn, sp);
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
forward_kernel_h<<<blocks, threads, 0, stream>>>(
|
||||
reinterpret_cast<half*>(x.data<at::Half>()),
|
||||
mean.data<float>(),
|
||||
var.data<float>(),
|
||||
weight.data<float>(),
|
||||
bias.data<float>(),
|
||||
affine, eps, num, chn, sp);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
|
||||
float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
||||
float _bias = affine ? bias[plane] : 0.f;
|
||||
|
||||
Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
edz[plane] = res.v1;
|
||||
eydz[plane] = res.v2;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
|
||||
bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(z, num, chn, sp);
|
||||
|
||||
auto edz = at::empty({chn},z.options().dtype(at::kFloat));
|
||||
auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
|
||||
reinterpret_cast<half*>(z.data<at::Half>()),
|
||||
reinterpret_cast<half*>(dz.data<at::Half>()),
|
||||
weight.data<float>(),
|
||||
bias.data<float>(),
|
||||
edz.data<float>(),
|
||||
eydz.data<float>(),
|
||||
affine, eps, num, chn, sp);
|
||||
|
||||
return {edz, eydz};
|
||||
}
|
||||
|
||||
__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
|
||||
const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
|
||||
int plane = blockIdx.x;
|
||||
|
||||
float _weight = affine ? abs(weight[plane]) + eps : 1.f;
|
||||
float _bias = affine ? bias[plane] : 0.f;
|
||||
float _var = var[plane];
|
||||
float _edz = edz[plane];
|
||||
float _eydz = eydz[plane];
|
||||
|
||||
float _mul = _weight * rsqrt(_var + eps);
|
||||
float count = float(num * sp);
|
||||
|
||||
for (int batch = 0; batch < num; ++batch) {
|
||||
for (int n = threadIdx.x; n < sp; n += blockDim.x) {
|
||||
float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
|
||||
float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
|
||||
|
||||
dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
|
||||
at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
CHECK_CUDA_INPUT(var);
|
||||
CHECK_CUDA_INPUT(weight);
|
||||
CHECK_CUDA_INPUT(bias);
|
||||
CHECK_CUDA_INPUT(edz);
|
||||
CHECK_CUDA_INPUT(eydz);
|
||||
|
||||
// Extract dimensions
|
||||
int64_t num, chn, sp;
|
||||
get_dims(z, num, chn, sp);
|
||||
|
||||
auto dx = at::zeros_like(z);
|
||||
|
||||
// Run kernel
|
||||
dim3 blocks(chn);
|
||||
dim3 threads(getNumThreads(sp));
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
backward_kernel_h<<<blocks, threads, 0, stream>>>(
|
||||
reinterpret_cast<half*>(z.data<at::Half>()),
|
||||
reinterpret_cast<half*>(dz.data<at::Half>()),
|
||||
var.data<float>(),
|
||||
weight.data<float>(),
|
||||
bias.data<float>(),
|
||||
edz.data<float>(),
|
||||
eydz.data<float>(),
|
||||
reinterpret_cast<half*>(dx.data<at::Half>()),
|
||||
affine, eps, num, chn, sp);
|
||||
|
||||
return dx;
|
||||
}
|
||||
|
||||
__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
|
||||
float _z = __half2float(z[i]);
|
||||
if (_z < 0) {
|
||||
dz[i] = __float2half(__half2float(dz[i]) * slope);
|
||||
z[i] = __float2half(_z / slope);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
|
||||
CHECK_CUDA_INPUT(z);
|
||||
CHECK_CUDA_INPUT(dz);
|
||||
|
||||
int64_t count = z.numel();
|
||||
dim3 threads(getNumThreads(count));
|
||||
dim3 blocks = (count + threads.x - 1) / threads.x;
|
||||
auto stream = at::cuda::getCurrentCUDAStream();
|
||||
leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
|
||||
reinterpret_cast<half*>(z.data<at::Half>()),
|
||||
reinterpret_cast<half*>(dz.data<at::Half>()),
|
||||
slope, count);
|
||||
}
|
||||
|
@ -1,15 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
|
||||
#ifndef AT_CHECK
|
||||
#define AT_CHECK AT_ASSERT
|
||||
#endif
|
||||
|
||||
#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
|
||||
#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
|
||||
#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
|
||||
|
||||
#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
|
||||
#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
|
@ -1,49 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
|
||||
/*
|
||||
* Functions to share code between CPU and GPU
|
||||
*/
|
||||
|
||||
#ifdef __CUDACC__
|
||||
// CUDA versions
|
||||
|
||||
#define HOST_DEVICE __host__ __device__
|
||||
#define INLINE_HOST_DEVICE __host__ __device__ inline
|
||||
#define FLOOR(x) floor(x)
|
||||
|
||||
#if __CUDA_ARCH__ >= 600
|
||||
// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
|
||||
#define ACCUM(x,y) atomicAdd_block(&(x),(y))
|
||||
#else
|
||||
// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
|
||||
// and use the known atomicCAS-based implementation for double
|
||||
template<typename data_t>
|
||||
__device__ inline data_t atomic_add(data_t *address, data_t val) {
|
||||
return atomicAdd(address, val);
|
||||
}
|
||||
|
||||
template<>
|
||||
__device__ inline double atomic_add(double *address, double val) {
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*)address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
}
|
||||
|
||||
#define ACCUM(x,y) atomic_add(&(x),(y))
|
||||
#endif // #if __CUDA_ARCH__ >= 600
|
||||
|
||||
#else
|
||||
// CPU versions
|
||||
|
||||
#define HOST_DEVICE
|
||||
#define INLINE_HOST_DEVICE inline
|
||||
#define FLOOR(x) std::floor(x)
|
||||
#define ACCUM(x,y) (x) += (y)
|
||||
|
||||
#endif // #ifdef __CUDACC__
|
@ -1,71 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* General settings and functions
|
||||
*/
|
||||
const int WARP_SIZE = 32;
|
||||
const int MAX_BLOCK_SIZE = 1024;
|
||||
|
||||
static int getNumThreads(int nElem) {
|
||||
int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
if (nElem <= threadSizes[i]) {
|
||||
return threadSizes[i];
|
||||
}
|
||||
}
|
||||
return MAX_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reduction utilities
|
||||
*/
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
|
||||
unsigned int mask = 0xffffffff) {
|
||||
#if CUDART_VERSION >= 9000
|
||||
return __shfl_xor_sync(mask, value, laneMask, width);
|
||||
#else
|
||||
return __shfl_xor(value, laneMask, width);
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
|
||||
|
||||
template<typename T>
|
||||
struct Pair {
|
||||
T v1, v2;
|
||||
__device__ Pair() {}
|
||||
__device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
|
||||
__device__ Pair(T v) : v1(v), v2(v) {}
|
||||
__device__ Pair(int v) : v1(v), v2(v) {}
|
||||
__device__ Pair &operator+=(const Pair<T> &a) {
|
||||
v1 += a.v1;
|
||||
v2 += a.v2;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ T warpSum(T val) {
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
|
||||
val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
|
||||
}
|
||||
#else
|
||||
__shared__ T values[MAX_BLOCK_SIZE];
|
||||
values[threadIdx.x] = val;
|
||||
__threadfence_block();
|
||||
const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
|
||||
for (int i = 1; i < WARP_SIZE; i++) {
|
||||
val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
|
||||
}
|
||||
#endif
|
||||
return val;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
|
||||
value.v1 = warpSum(value.v1);
|
||||
value.v2 = warpSum(value.v2);
|
||||
return value;
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
import torch
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
class Optimizer(object):
|
||||
def __init__(self,
|
||||
model,
|
||||
lr0,
|
||||
momentum,
|
||||
wd,
|
||||
warmup_steps,
|
||||
warmup_start_lr,
|
||||
max_iter,
|
||||
power,
|
||||
*args, **kwargs):
|
||||
self.warmup_steps = warmup_steps
|
||||
self.warmup_start_lr = warmup_start_lr
|
||||
self.lr0 = lr0
|
||||
self.lr = self.lr0
|
||||
self.max_iter = float(max_iter)
|
||||
self.power = power
|
||||
self.it = 0
|
||||
wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params()
|
||||
param_list = [
|
||||
{'params': wd_params},
|
||||
{'params': nowd_params, 'weight_decay': 0},
|
||||
{'params': lr_mul_wd_params, 'lr_mul': True},
|
||||
{'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}]
|
||||
self.optim = torch.optim.SGD(
|
||||
param_list,
|
||||
lr = lr0,
|
||||
momentum = momentum,
|
||||
weight_decay = wd)
|
||||
self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps)
|
||||
|
||||
|
||||
def get_lr(self):
|
||||
if self.it <= self.warmup_steps:
|
||||
lr = self.warmup_start_lr*(self.warmup_factor**self.it)
|
||||
else:
|
||||
factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power
|
||||
lr = self.lr0 * factor
|
||||
return lr
|
||||
|
||||
|
||||
def step(self):
|
||||
self.lr = self.get_lr()
|
||||
for pg in self.optim.param_groups:
|
||||
if pg.get('lr_mul', False):
|
||||
pg['lr'] = self.lr * 10
|
||||
else:
|
||||
pg['lr'] = self.lr
|
||||
if self.optim.defaults.get('lr_mul', False):
|
||||
self.optim.defaults['lr'] = self.lr * 10
|
||||
else:
|
||||
self.optim.defaults['lr'] = self.lr
|
||||
self.it += 1
|
||||
self.optim.step()
|
||||
if self.it == self.warmup_steps+2:
|
||||
logger.info('==> warmup done, start to implement poly lr strategy')
|
||||
|
||||
def zero_grad(self):
|
||||
self.optim.zero_grad()
|
||||
|
@ -1,38 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import os.path as osp
|
||||
import os
|
||||
import cv2
|
||||
from transform import *
|
||||
from PIL import Image
|
||||
|
||||
face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
|
||||
face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
|
||||
mask_path = '/home/zll/data/CelebAMask-HQ/mask'
|
||||
counter = 0
|
||||
total = 0
|
||||
for i in range(15):
|
||||
|
||||
atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
|
||||
'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
|
||||
|
||||
for j in range(i * 2000, (i + 1) * 2000):
|
||||
|
||||
mask = np.zeros((512, 512))
|
||||
|
||||
for l, att in enumerate(atts, 1):
|
||||
total += 1
|
||||
file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
|
||||
path = osp.join(face_sep_mask, str(i), file_name)
|
||||
|
||||
if os.path.exists(path):
|
||||
counter += 1
|
||||
sep_mask = np.array(Image.open(path).convert('P'))
|
||||
# print(np.unique(sep_mask))
|
||||
|
||||
mask[sep_mask == 225] = l
|
||||
cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
|
||||
print(j)
|
||||
|
||||
print(counter, total)
|
@ -1,109 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.model_zoo as modelzoo
|
||||
|
||||
# from modules.bn import InPlaceABNSync as BatchNorm2d
|
||||
|
||||
resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, in_chan, out_chan, stride=1):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(in_chan, out_chan, stride)
|
||||
self.bn1 = nn.BatchNorm2d(out_chan)
|
||||
self.conv2 = conv3x3(out_chan, out_chan)
|
||||
self.bn2 = nn.BatchNorm2d(out_chan)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = None
|
||||
if in_chan != out_chan or stride != 1:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(in_chan, out_chan,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(out_chan),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
residual = self.conv1(x)
|
||||
residual = F.relu(self.bn1(residual))
|
||||
residual = self.conv2(residual)
|
||||
residual = self.bn2(residual)
|
||||
|
||||
shortcut = x
|
||||
if self.downsample is not None:
|
||||
shortcut = self.downsample(x)
|
||||
|
||||
out = shortcut + residual
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
def create_layer_basic(in_chan, out_chan, bnum, stride=1):
|
||||
layers = [BasicBlock(in_chan, out_chan, stride=stride)]
|
||||
for i in range(bnum-1):
|
||||
layers.append(BasicBlock(out_chan, out_chan, stride=1))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
class Resnet18(nn.Module):
|
||||
def __init__(self):
|
||||
super(Resnet18, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
|
||||
self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
|
||||
self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
|
||||
self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
|
||||
self.init_weight()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = F.relu(self.bn1(x))
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
feat8 = self.layer2(x) # 1/8
|
||||
feat16 = self.layer3(feat8) # 1/16
|
||||
feat32 = self.layer4(feat16) # 1/32
|
||||
return feat8, feat16, feat32
|
||||
|
||||
def init_weight(self):
|
||||
state_dict = modelzoo.load_url(resnet18_url)
|
||||
self_state_dict = self.state_dict()
|
||||
for k, v in state_dict.items():
|
||||
if 'fc' in k: continue
|
||||
self_state_dict.update({k: v})
|
||||
self.load_state_dict(self_state_dict)
|
||||
|
||||
def get_params(self):
|
||||
wd_params, nowd_params = [], []
|
||||
for name, module in self.named_modules():
|
||||
if isinstance(module, (nn.Linear, nn.Conv2d)):
|
||||
wd_params.append(module.weight)
|
||||
if not module.bias is None:
|
||||
nowd_params.append(module.bias)
|
||||
elif isinstance(module, nn.BatchNorm2d):
|
||||
nowd_params += list(module.parameters())
|
||||
return wd_params, nowd_params
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
net = Resnet18()
|
||||
x = torch.randn(16, 3, 224, 224)
|
||||
out = net(x)
|
||||
print(out[0].size())
|
||||
print(out[1].size())
|
||||
print(out[2].size())
|
||||
net.get_params()
|
Binary file not shown.
@ -1,100 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from logger import setup_logger
|
||||
from model import BiSeNet
|
||||
|
||||
import torch
|
||||
|
||||
import os
|
||||
import os.path as osp
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import torchvision.transforms as transforms
|
||||
import cv2
|
||||
|
||||
def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
|
||||
# Colors for all 20 parts
|
||||
part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
|
||||
[255, 0, 85], [255, 0, 170],
|
||||
[0, 255, 0], [85, 255, 0], [170, 255, 0],
|
||||
[0, 255, 85], [0, 255, 170],
|
||||
[0, 0, 255], [85, 0, 255], [170, 0, 255],
|
||||
[0, 85, 255], [0, 170, 255],
|
||||
[255, 255, 0], [255, 255, 85], [255, 255, 170],
|
||||
[255, 0, 255], [255, 85, 255], [255, 170, 255],
|
||||
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
||||
|
||||
im = np.array(im)
|
||||
vis_im = im.copy().astype(np.uint8)
|
||||
vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
|
||||
vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
|
||||
vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
|
||||
|
||||
num_of_class = np.max(vis_parsing_anno)
|
||||
|
||||
for pi in range(1, num_of_class + 1):
|
||||
index = np.where(vis_parsing_anno == pi)
|
||||
vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
|
||||
|
||||
vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
|
||||
# print(vis_parsing_anno_color.shape, vis_im.shape)
|
||||
vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
|
||||
|
||||
# Save result or not
|
||||
if save_im:
|
||||
cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
|
||||
cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
# return vis_im
|
||||
|
||||
def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
|
||||
|
||||
if not os.path.exists(respth):
|
||||
os.makedirs(respth)
|
||||
|
||||
n_classes = 19
|
||||
net = BiSeNet(n_classes=n_classes)
|
||||
save_pth = osp.join('res/cp', cp)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
net.cuda()
|
||||
net.load_state_dict(torch.load(save_pth))
|
||||
else:
|
||||
net.load_state_dict(torch.load(save_pth, map_location=lambda storage, loc: storage))
|
||||
|
||||
|
||||
net.eval()
|
||||
|
||||
to_tensor = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
|
||||
])
|
||||
with torch.no_grad():
|
||||
for image_path in os.listdir(dspth):
|
||||
img = Image.open(osp.join(dspth, image_path))
|
||||
image = img.resize((512, 512), Image.BILINEAR)
|
||||
img = to_tensor(image)
|
||||
img = torch.unsqueeze(img, 0)
|
||||
if torch.cuda.is_available():
|
||||
img = img.cuda()
|
||||
out = net(img)[0]
|
||||
if torch.cuda.is_available():
|
||||
parsing = out.squeeze(0).cpu().numpy().argmax(0)
|
||||
else:
|
||||
parsing = out.squeeze(0).numpy().argmax(0)
|
||||
# print(parsing)
|
||||
print(np.unique(parsing))
|
||||
|
||||
vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
evaluate(dspth='makeup/116_ori.png', cp='79999_iter.pth')
|
||||
|
||||
|
@ -1,179 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from logger import setup_logger
|
||||
from model import BiSeNet
|
||||
from face_dataset import FaceMask
|
||||
from loss import OhemCELoss
|
||||
from evaluate import evaluate
|
||||
from optimizer import Optimizer
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.nn.functional as F
|
||||
import torch.distributed as dist
|
||||
|
||||
import os
|
||||
import os.path as osp
|
||||
import logging
|
||||
import time
|
||||
import datetime
|
||||
import argparse
|
||||
|
||||
|
||||
respth = './res'
|
||||
if not osp.exists(respth):
|
||||
os.makedirs(respth)
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def parse_args():
|
||||
parse = argparse.ArgumentParser()
|
||||
parse.add_argument(
|
||||
'--local_rank',
|
||||
dest = 'local_rank',
|
||||
type = int,
|
||||
default = -1,
|
||||
)
|
||||
return parse.parse_args()
|
||||
|
||||
|
||||
def train():
|
||||
args = parse_args()
|
||||
torch.cuda.set_device(args.local_rank)
|
||||
dist.init_process_group(
|
||||
backend = 'nccl',
|
||||
init_method = 'tcp://127.0.0.1:33241',
|
||||
world_size = torch.cuda.device_count(),
|
||||
rank=args.local_rank
|
||||
)
|
||||
setup_logger(respth)
|
||||
|
||||
# dataset
|
||||
n_classes = 19
|
||||
n_img_per_gpu = 16
|
||||
n_workers = 8
|
||||
cropsize = [448, 448]
|
||||
data_root = '/home/zll/data/CelebAMask-HQ/'
|
||||
|
||||
ds = FaceMask(data_root, cropsize=cropsize, mode='train')
|
||||
sampler = torch.utils.data.distributed.DistributedSampler(ds)
|
||||
dl = DataLoader(ds,
|
||||
batch_size = n_img_per_gpu,
|
||||
shuffle = False,
|
||||
sampler = sampler,
|
||||
num_workers = n_workers,
|
||||
pin_memory = True,
|
||||
drop_last = True)
|
||||
|
||||
# model
|
||||
ignore_idx = -100
|
||||
net = BiSeNet(n_classes=n_classes)
|
||||
net.cuda()
|
||||
net.train()
|
||||
net = nn.parallel.DistributedDataParallel(net,
|
||||
device_ids = [args.local_rank, ],
|
||||
output_device = args.local_rank
|
||||
)
|
||||
score_thres = 0.7
|
||||
n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16
|
||||
LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
|
||||
Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
|
||||
Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
|
||||
|
||||
## optimizer
|
||||
momentum = 0.9
|
||||
weight_decay = 5e-4
|
||||
lr_start = 1e-2
|
||||
max_iter = 80000
|
||||
power = 0.9
|
||||
warmup_steps = 1000
|
||||
warmup_start_lr = 1e-5
|
||||
optim = Optimizer(
|
||||
model = net.module,
|
||||
lr0 = lr_start,
|
||||
momentum = momentum,
|
||||
wd = weight_decay,
|
||||
warmup_steps = warmup_steps,
|
||||
warmup_start_lr = warmup_start_lr,
|
||||
max_iter = max_iter,
|
||||
power = power)
|
||||
|
||||
## train loop
|
||||
msg_iter = 50
|
||||
loss_avg = []
|
||||
st = glob_st = time.time()
|
||||
diter = iter(dl)
|
||||
epoch = 0
|
||||
for it in range(max_iter):
|
||||
try:
|
||||
im, lb = next(diter)
|
||||
if not im.size()[0] == n_img_per_gpu:
|
||||
raise StopIteration
|
||||
except StopIteration:
|
||||
epoch += 1
|
||||
sampler.set_epoch(epoch)
|
||||
diter = iter(dl)
|
||||
im, lb = next(diter)
|
||||
im = im.cuda()
|
||||
lb = lb.cuda()
|
||||
H, W = im.size()[2:]
|
||||
lb = torch.squeeze(lb, 1)
|
||||
|
||||
optim.zero_grad()
|
||||
out, out16, out32 = net(im)
|
||||
lossp = LossP(out, lb)
|
||||
loss2 = Loss2(out16, lb)
|
||||
loss3 = Loss3(out32, lb)
|
||||
loss = lossp + loss2 + loss3
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
loss_avg.append(loss.item())
|
||||
|
||||
# print training log message
|
||||
if (it+1) % msg_iter == 0:
|
||||
loss_avg = sum(loss_avg) / len(loss_avg)
|
||||
lr = optim.lr
|
||||
ed = time.time()
|
||||
t_intv, glob_t_intv = ed - st, ed - glob_st
|
||||
eta = int((max_iter - it) * (glob_t_intv / it))
|
||||
eta = str(datetime.timedelta(seconds=eta))
|
||||
msg = ', '.join([
|
||||
'it: {it}/{max_it}',
|
||||
'lr: {lr:4f}',
|
||||
'loss: {loss:.4f}',
|
||||
'eta: {eta}',
|
||||
'time: {time:.4f}',
|
||||
]).format(
|
||||
it = it+1,
|
||||
max_it = max_iter,
|
||||
lr = lr,
|
||||
loss = loss_avg,
|
||||
time = t_intv,
|
||||
eta = eta
|
||||
)
|
||||
logger.info(msg)
|
||||
loss_avg = []
|
||||
st = ed
|
||||
if dist.get_rank() == 0:
|
||||
if (it+1) % 5000 == 0:
|
||||
state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
|
||||
if dist.get_rank() == 0:
|
||||
torch.save(state, './res/cp/{}_iter.pth'.format(it))
|
||||
evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it))
|
||||
|
||||
# dump the final model
|
||||
save_pth = osp.join(respth, 'model_final_diss.pth')
|
||||
# net.cpu()
|
||||
state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
|
||||
if dist.get_rank() == 0:
|
||||
torch.save(state, save_pth)
|
||||
logger.info('training done, model saved to: {}'.format(save_pth))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
@ -1,129 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
from PIL import Image
|
||||
import PIL.ImageEnhance as ImageEnhance
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
class RandomCrop(object):
|
||||
def __init__(self, size, *args, **kwargs):
|
||||
self.size = size
|
||||
|
||||
def __call__(self, im_lb):
|
||||
im = im_lb['im']
|
||||
lb = im_lb['lb']
|
||||
assert im.size == lb.size
|
||||
W, H = self.size
|
||||
w, h = im.size
|
||||
|
||||
if (W, H) == (w, h): return dict(im=im, lb=lb)
|
||||
if w < W or h < H:
|
||||
scale = float(W) / w if w < h else float(H) / h
|
||||
w, h = int(scale * w + 1), int(scale * h + 1)
|
||||
im = im.resize((w, h), Image.BILINEAR)
|
||||
lb = lb.resize((w, h), Image.NEAREST)
|
||||
sw, sh = random.random() * (w - W), random.random() * (h - H)
|
||||
crop = int(sw), int(sh), int(sw) + W, int(sh) + H
|
||||
return dict(
|
||||
im = im.crop(crop),
|
||||
lb = lb.crop(crop)
|
||||
)
|
||||
|
||||
|
||||
class HorizontalFlip(object):
|
||||
def __init__(self, p=0.5, *args, **kwargs):
|
||||
self.p = p
|
||||
|
||||
def __call__(self, im_lb):
|
||||
if random.random() > self.p:
|
||||
return im_lb
|
||||
else:
|
||||
im = im_lb['im']
|
||||
lb = im_lb['lb']
|
||||
|
||||
# atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',
|
||||
# 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
|
||||
|
||||
flip_lb = np.array(lb)
|
||||
flip_lb[lb == 2] = 3
|
||||
flip_lb[lb == 3] = 2
|
||||
flip_lb[lb == 4] = 5
|
||||
flip_lb[lb == 5] = 4
|
||||
flip_lb[lb == 7] = 8
|
||||
flip_lb[lb == 8] = 7
|
||||
flip_lb = Image.fromarray(flip_lb)
|
||||
return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT),
|
||||
lb = flip_lb.transpose(Image.FLIP_LEFT_RIGHT),
|
||||
)
|
||||
|
||||
|
||||
class RandomScale(object):
|
||||
def __init__(self, scales=(1, ), *args, **kwargs):
|
||||
self.scales = scales
|
||||
|
||||
def __call__(self, im_lb):
|
||||
im = im_lb['im']
|
||||
lb = im_lb['lb']
|
||||
W, H = im.size
|
||||
scale = random.choice(self.scales)
|
||||
w, h = int(W * scale), int(H * scale)
|
||||
return dict(im = im.resize((w, h), Image.BILINEAR),
|
||||
lb = lb.resize((w, h), Image.NEAREST),
|
||||
)
|
||||
|
||||
|
||||
class ColorJitter(object):
|
||||
def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs):
|
||||
if not brightness is None and brightness>0:
|
||||
self.brightness = [max(1-brightness, 0), 1+brightness]
|
||||
if not contrast is None and contrast>0:
|
||||
self.contrast = [max(1-contrast, 0), 1+contrast]
|
||||
if not saturation is None and saturation>0:
|
||||
self.saturation = [max(1-saturation, 0), 1+saturation]
|
||||
|
||||
def __call__(self, im_lb):
|
||||
im = im_lb['im']
|
||||
lb = im_lb['lb']
|
||||
r_brightness = random.uniform(self.brightness[0], self.brightness[1])
|
||||
r_contrast = random.uniform(self.contrast[0], self.contrast[1])
|
||||
r_saturation = random.uniform(self.saturation[0], self.saturation[1])
|
||||
im = ImageEnhance.Brightness(im).enhance(r_brightness)
|
||||
im = ImageEnhance.Contrast(im).enhance(r_contrast)
|
||||
im = ImageEnhance.Color(im).enhance(r_saturation)
|
||||
return dict(im = im,
|
||||
lb = lb,
|
||||
)
|
||||
|
||||
|
||||
class MultiScale(object):
|
||||
def __init__(self, scales):
|
||||
self.scales = scales
|
||||
|
||||
def __call__(self, img):
|
||||
W, H = img.size
|
||||
sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales]
|
||||
imgs = []
|
||||
[imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes]
|
||||
return imgs
|
||||
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, do_list):
|
||||
self.do_list = do_list
|
||||
|
||||
def __call__(self, im_lb):
|
||||
for comp in self.do_list:
|
||||
im_lb = comp(im_lb)
|
||||
return im_lb
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
flip = HorizontalFlip(p = 1)
|
||||
crop = RandomCrop((321, 321))
|
||||
rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0))
|
||||
img = Image.open('data/img.jpg')
|
||||
lb = Image.open('data/label.png')
|
Loading…
Reference in New Issue