simplifyUpdate

4 years ago · b68f8c1196
parent 089d0245eb
commit b68f8c1196
33 changed files with 0 additions and 3021 deletions
--- a/gimp-plugins/.DS_Store
+++ b/gimp-plugins/.DS_Store
--- a/gimp-plugins/face-parsing.PyTorch/LICENSE.txt
+++ b/gimp-plugins/face-parsing.PyTorch/LICENSE.txt
@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2019 zll
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/gimp-plugins/face-parsing.PyTorch/evaluate.py
+++ b/gimp-plugins/face-parsing.PyTorch/evaluate.py
@ -1,95 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-from logger import setup_logger
-from model import BiSeNet
-from face_dataset import FaceMask
-
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader
-import torch.nn.functional as F
-import torch.distributed as dist
-
-import os
-import os.path as osp
-import logging
-import time
-import numpy as np
-from tqdm import tqdm
-import math
-from PIL import Image
-import torchvision.transforms as transforms
-import cv2
-
-def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
-    # Colors for all 20 parts
-    part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
-                   [255, 0, 85], [255, 0, 170],
-                   [0, 255, 0], [85, 255, 0], [170, 255, 0],
-                   [0, 255, 85], [0, 255, 170],
-                   [0, 0, 255], [85, 0, 255], [170, 0, 255],
-                   [0, 85, 255], [0, 170, 255],
-                   [255, 255, 0], [255, 255, 85], [255, 255, 170],
-                   [255, 0, 255], [255, 85, 255], [255, 170, 255],
-                   [0, 255, 255], [85, 255, 255], [170, 255, 255]]
-
-    im = np.array(im)
-    vis_im = im.copy().astype(np.uint8)
-    vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
-    vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
-    vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
-
-    num_of_class = np.max(vis_parsing_anno)
-
-    for pi in range(1, num_of_class + 1):
-        index = np.where(vis_parsing_anno == pi)
-        vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
-
-    vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
-    # print(vis_parsing_anno_color.shape, vis_im.shape)
-    vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
-
-    # Save result or not
-    if save_im:
-        cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
-
-    # return vis_im
-
-def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
-
-    if not os.path.exists(respth):
-        os.makedirs(respth)
-
-    n_classes = 19
-    net = BiSeNet(n_classes=n_classes)
-    net.cuda()
-    save_pth = osp.join('res/cp', cp)
-    net.load_state_dict(torch.load(save_pth))
-    net.eval()
-
-    to_tensor = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-    ])
-    with torch.no_grad():
-        for image_path in os.listdir(dspth):
-            img = Image.open(osp.join(dspth, image_path))
-            image = img.resize((512, 512), Image.BILINEAR)
-            img = to_tensor(image)
-            img = torch.unsqueeze(img, 0)
-            img = img.cuda()
-            out = net(img)[0]
-            parsing = out.squeeze(0).cpu().numpy().argmax(0)
-
-            vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
-
-
-
-
-
-
-
-if __name__ == "__main__":
-    setup_logger('./res')
-    evaluate()
--- a/gimp-plugins/face-parsing.PyTorch/face_dataset.py
+++ b/gimp-plugins/face-parsing.PyTorch/face_dataset.py
@ -1,106 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-import torch
-from torch.utils.data import Dataset
-import torchvision.transforms as transforms
-
-import os.path as osp
-import os
-from PIL import Image
-import numpy as np
-import json
-import cv2
-
-from transform import *
-
-
-
-class FaceMask(Dataset):
-    def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
-        super(FaceMask, self).__init__(*args, **kwargs)
-        assert mode in ('train', 'val', 'test')
-        self.mode = mode
-        self.ignore_lb = 255
-        self.rootpth = rootpth
-
-        self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img'))
-
-        #  pre-processing
-        self.to_tensor = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-            ])
-        self.trans_train = Compose([
-            ColorJitter(
-                brightness=0.5,
-                contrast=0.5,
-                saturation=0.5),
-            HorizontalFlip(),
-            RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
-            RandomCrop(cropsize)
-            ])
-
-    def __getitem__(self, idx):
-        impth = self.imgs[idx]
-        img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth))
-        img = img.resize((512, 512), Image.BILINEAR)
-        label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P')
-        # print(np.unique(np.array(label)))
-        if self.mode == 'train':
-            im_lb = dict(im=img, lb=label)
-            im_lb = self.trans_train(im_lb)
-            img, label = im_lb['im'], im_lb['lb']
-        img = self.to_tensor(img)
-        label = np.array(label).astype(np.int64)[np.newaxis, :]
-        return img, label
-
-    def __len__(self):
-        return len(self.imgs)
-
-
-if __name__ == "__main__":
-    face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
-    face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
-    mask_path = '/home/zll/data/CelebAMask-HQ/mask'
-    counter = 0
-    total = 0
-    for i in range(15):
-        # files = os.listdir(osp.join(face_sep_mask, str(i)))
-
-        atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
-                'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
-
-        for j in range(i*2000, (i+1)*2000):
-
-            mask = np.zeros((512, 512))
-
-            for l, att in enumerate(atts, 1):
-                total += 1
-                file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
-                path = osp.join(face_sep_mask, str(i), file_name)
-
-                if os.path.exists(path):
-                    counter += 1
-                    sep_mask = np.array(Image.open(path).convert('P'))
-                    # print(np.unique(sep_mask))
-
-                    mask[sep_mask == 225] = l
-            cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
-            print(j)
-
-    print(counter, total)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/gimp-plugins/face-parsing.PyTorch/logger.py
+++ b/gimp-plugins/face-parsing.PyTorch/logger.py
@ -1,23 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-
-import os.path as osp
-import time
-import sys
-import logging
-
-import torch.distributed as dist
-
-
-def setup_logger(logpth):
-    logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
-    logfile = osp.join(logpth, logfile)
-    FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
-    log_level = logging.INFO
-    if dist.is_initialized() and not dist.get_rank()==0:
-        log_level = logging.ERROR
-    logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
-    logging.root.addHandler(logging.StreamHandler())
-
-
--- a/gimp-plugins/face-parsing.PyTorch/logger.pyc
+++ b/gimp-plugins/face-parsing.PyTorch/logger.pyc
--- a/gimp-plugins/face-parsing.PyTorch/loss.py
+++ b/gimp-plugins/face-parsing.PyTorch/loss.py
@ -1,75 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import numpy as np
-
-
-class OhemCELoss(nn.Module):
-    def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
-        super(OhemCELoss, self).__init__()
-        self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
-        self.n_min = n_min
-        self.ignore_lb = ignore_lb
-        self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
-
-    def forward(self, logits, labels):
-        N, C, H, W = logits.size()
-        loss = self.criteria(logits, labels).view(-1)
-        loss, _ = torch.sort(loss, descending=True)
-        if loss[self.n_min] > self.thresh:
-            loss = loss[loss>self.thresh]
-        else:
-            loss = loss[:self.n_min]
-        return torch.mean(loss)
-
-
-class SoftmaxFocalLoss(nn.Module):
-    def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
-        super(SoftmaxFocalLoss, self).__init__()
-        self.gamma = gamma
-        self.nll = nn.NLLLoss(ignore_index=ignore_lb)
-
-    def forward(self, logits, labels):
-        scores = F.softmax(logits, dim=1)
-        factor = torch.pow(1.-scores, self.gamma)
-        log_score = F.log_softmax(logits, dim=1)
-        log_score = factor * log_score
-        loss = self.nll(log_score, labels)
-        return loss
-
-
-if __name__ == '__main__':
-    torch.manual_seed(15)
-    criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
-    criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
-    net1 = nn.Sequential(
-        nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
-    )
-    net1.cuda()
-    net1.train()
-    net2 = nn.Sequential(
-        nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
-    )
-    net2.cuda()
-    net2.train()
-
-    with torch.no_grad():
-        inten = torch.randn(16, 3, 20, 20).cuda()
-        lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
-        lbs[1, :, :] = 255
-
-    logits1 = net1(inten)
-    logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
-    logits2 = net2(inten)
-    logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
-
-    loss1 = criteria1(logits1, lbs)
-    loss2 = criteria2(logits2, lbs)
-    loss = loss1 + loss2
-    print(loss.detach().cpu())
-    loss.backward()
--- a/gimp-plugins/face-parsing.PyTorch/makeup.py
+++ b/gimp-plugins/face-parsing.PyTorch/makeup.py
@ -1,130 +0,0 @@
-import cv2
-import os
-import numpy as np
-from skimage.filters import gaussian
-
-
-def sharpen(img):
-    img = img * 1.0
-    gauss_out = gaussian(img, sigma=5, multichannel=True)
-
-    alpha = 1.5
-    img_out = (img - gauss_out) * alpha + img
-
-    img_out = img_out / 255.0
-
-    mask_1 = img_out < 0
-    mask_2 = img_out > 1
-
-    img_out = img_out * (1 - mask_1)
-    img_out = img_out * (1 - mask_2) + mask_2
-    img_out = np.clip(img_out, 0, 1)
-    img_out = img_out * 255
-    return np.array(img_out, dtype=np.uint8)
-
-
-def hair(image, parsing, part=17, color=[230, 50, 20]):
-    b, g, r = color      #[10, 50, 250]       # [10, 250, 10]
-    tar_color = np.zeros_like(image)
-    tar_color[:, :, 0] = b
-    tar_color[:, :, 1] = g
-    tar_color[:, :, 2] = r
-
-    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-    tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV)
-
-    if part == 12 or part == 13:
-        image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2]
-    else:
-        image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1]
-
-    changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR)
-
-    if part == 17:
-        changed = sharpen(changed)
-
-    changed[parsing != part] = image[parsing != part]
-    # changed = cv2.resize(changed, (512, 512))
-    return changed
-
-#
-# def lip(image, parsing, part=17, color=[230, 50, 20]):
-#     b, g, r = color      #[10, 50, 250]       # [10, 250, 10]
-#     tar_color = np.zeros_like(image)
-#     tar_color[:, :, 0] = b
-#     tar_color[:, :, 1] = g
-#     tar_color[:, :, 2] = r
-#
-#     image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
-#     il, ia, ib = cv2.split(image_lab)
-#
-#     tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab)
-#     tl, ta, tb = cv2.split(tar_lab)
-#
-#     image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100)
-#     image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128)
-#     image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128)
-#
-#
-#     changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR)
-#
-#     if part == 17:
-#         changed = sharpen(changed)
-#
-#     changed[parsing != part] = image[parsing != part]
-#     # changed = cv2.resize(changed, (512, 512))
-#     return changed
-
-
-if __name__ == '__main__':
-    # 1  face
-    # 10 nose
-    # 11 teeth
-    # 12 upper lip
-    # 13 lower lip
-    # 17 hair
-    num = 116
-    table = {
-        'hair': 17,
-        'upper_lip': 12,
-        'lower_lip': 13
-    }
-    image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num)
-    parsing_path = 'res/test_res/{}.png'.format(num)
-
-    image = cv2.imread(image_path)
-    ori = image.copy()
-    parsing = np.array(cv2.imread(parsing_path, 0))
-    parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST)
-
-    parts = [table['hair'], table['upper_lip'], table['lower_lip']]
-    # colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]]
-    colors = [[100, 200, 100]]
-    for part, color in zip(parts, colors):
-        image = hair(image, parsing, part, color)
-    cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512)))
-    cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512)))
-
-    cv2.imshow('image', cv2.resize(ori, (512, 512)))
-    cv2.imshow('color', cv2.resize(image, (512, 512)))
-
-    # cv2.imshow('image', ori)
-    # cv2.imshow('color', image)
-
-    cv2.waitKey(0)
-    cv2.destroyAllWindows()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/gimp-plugins/face-parsing.PyTorch/model.py
+++ b/gimp-plugins/face-parsing.PyTorch/model.py
@ -1,283 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision
-
-from resnet import Resnet18
-# from modules.bn import InPlaceABNSync as BatchNorm2d
-
-
-class ConvBNReLU(nn.Module):
-    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
-        super(ConvBNReLU, self).__init__()
-        self.conv = nn.Conv2d(in_chan,
-                out_chan,
-                kernel_size = ks,
-                stride = stride,
-                padding = padding,
-                bias = False)
-        self.bn = nn.BatchNorm2d(out_chan)
-        self.init_weight()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = F.relu(self.bn(x))
-        return x
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-class BiSeNetOutput(nn.Module):
-    def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
-        super(BiSeNetOutput, self).__init__()
-        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
-        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
-        self.init_weight()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.conv_out(x)
-        return x
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-    def get_params(self):
-        wd_params, nowd_params = [], []
-        for name, module in self.named_modules():
-            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
-                wd_params.append(module.weight)
-                if not module.bias is None:
-                    nowd_params.append(module.bias)
-            elif isinstance(module, nn.BatchNorm2d):
-                nowd_params += list(module.parameters())
-        return wd_params, nowd_params
-
-
-class AttentionRefinementModule(nn.Module):
-    def __init__(self, in_chan, out_chan, *args, **kwargs):
-        super(AttentionRefinementModule, self).__init__()
-        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
-        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
-        self.bn_atten = nn.BatchNorm2d(out_chan)
-        self.sigmoid_atten = nn.Sigmoid()
-        self.init_weight()
-
-    def forward(self, x):
-        feat = self.conv(x)
-        atten = F.avg_pool2d(feat, feat.size()[2:])
-        atten = self.conv_atten(atten)
-        atten = self.bn_atten(atten)
-        atten = self.sigmoid_atten(atten)
-        out = torch.mul(feat, atten)
-        return out
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-
-class ContextPath(nn.Module):
-    def __init__(self, *args, **kwargs):
-        super(ContextPath, self).__init__()
-        self.resnet = Resnet18()
-        self.arm16 = AttentionRefinementModule(256, 128)
-        self.arm32 = AttentionRefinementModule(512, 128)
-        self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
-        self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
-        self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
-
-        self.init_weight()
-
-    def forward(self, x):
-        H0, W0 = x.size()[2:]
-        feat8, feat16, feat32 = self.resnet(x)
-        H8, W8 = feat8.size()[2:]
-        H16, W16 = feat16.size()[2:]
-        H32, W32 = feat32.size()[2:]
-
-        avg = F.avg_pool2d(feat32, feat32.size()[2:])
-        avg = self.conv_avg(avg)
-        avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
-
-        feat32_arm = self.arm32(feat32)
-        feat32_sum = feat32_arm + avg_up
-        feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
-        feat32_up = self.conv_head32(feat32_up)
-
-        feat16_arm = self.arm16(feat16)
-        feat16_sum = feat16_arm + feat32_up
-        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
-        feat16_up = self.conv_head16(feat16_up)
-
-        return feat8, feat16_up, feat32_up  # x8, x8, x16
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-    def get_params(self):
-        wd_params, nowd_params = [], []
-        for name, module in self.named_modules():
-            if isinstance(module, (nn.Linear, nn.Conv2d)):
-                wd_params.append(module.weight)
-                if not module.bias is None:
-                    nowd_params.append(module.bias)
-            elif isinstance(module, nn.BatchNorm2d):
-                nowd_params += list(module.parameters())
-        return wd_params, nowd_params
-
-
-### This is not used, since I replace this with the resnet feature with the same size
-class SpatialPath(nn.Module):
-    def __init__(self, *args, **kwargs):
-        super(SpatialPath, self).__init__()
-        self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
-        self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
-        self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
-        self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
-        self.init_weight()
-
-    def forward(self, x):
-        feat = self.conv1(x)
-        feat = self.conv2(feat)
-        feat = self.conv3(feat)
-        feat = self.conv_out(feat)
-        return feat
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-    def get_params(self):
-        wd_params, nowd_params = [], []
-        for name, module in self.named_modules():
-            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
-                wd_params.append(module.weight)
-                if not module.bias is None:
-                    nowd_params.append(module.bias)
-            elif isinstance(module, nn.BatchNorm2d):
-                nowd_params += list(module.parameters())
-        return wd_params, nowd_params
-
-
-class FeatureFusionModule(nn.Module):
-    def __init__(self, in_chan, out_chan, *args, **kwargs):
-        super(FeatureFusionModule, self).__init__()
-        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
-        self.conv1 = nn.Conv2d(out_chan,
-                out_chan//4,
-                kernel_size = 1,
-                stride = 1,
-                padding = 0,
-                bias = False)
-        self.conv2 = nn.Conv2d(out_chan//4,
-                out_chan,
-                kernel_size = 1,
-                stride = 1,
-                padding = 0,
-                bias = False)
-        self.relu = nn.ReLU(inplace=True)
-        self.sigmoid = nn.Sigmoid()
-        self.init_weight()
-
-    def forward(self, fsp, fcp):
-        fcat = torch.cat([fsp, fcp], dim=1)
-        feat = self.convblk(fcat)
-        atten = F.avg_pool2d(feat, feat.size()[2:])
-        atten = self.conv1(atten)
-        atten = self.relu(atten)
-        atten = self.conv2(atten)
-        atten = self.sigmoid(atten)
-        feat_atten = torch.mul(feat, atten)
-        feat_out = feat_atten + feat
-        return feat_out
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-    def get_params(self):
-        wd_params, nowd_params = [], []
-        for name, module in self.named_modules():
-            if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
-                wd_params.append(module.weight)
-                if not module.bias is None:
-                    nowd_params.append(module.bias)
-            elif isinstance(module, nn.BatchNorm2d):
-                nowd_params += list(module.parameters())
-        return wd_params, nowd_params
-
-
-class BiSeNet(nn.Module):
-    def __init__(self, n_classes, *args, **kwargs):
-        super(BiSeNet, self).__init__()
-        self.cp = ContextPath()
-        ## here self.sp is deleted
-        self.ffm = FeatureFusionModule(256, 256)
-        self.conv_out = BiSeNetOutput(256, 256, n_classes)
-        self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
-        self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
-        self.init_weight()
-
-    def forward(self, x):
-        H, W = x.size()[2:]
-        feat_res8, feat_cp8, feat_cp16 = self.cp(x)  # here return res3b1 feature
-        feat_sp = feat_res8  # use res3b1 feature to replace spatial path feature
-        feat_fuse = self.ffm(feat_sp, feat_cp8)
-
-        feat_out = self.conv_out(feat_fuse)
-        feat_out16 = self.conv_out16(feat_cp8)
-        feat_out32 = self.conv_out32(feat_cp16)
-
-        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
-        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
-        feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
-        return feat_out, feat_out16, feat_out32
-
-    def init_weight(self):
-        for ly in self.children():
-            if isinstance(ly, nn.Conv2d):
-                nn.init.kaiming_normal_(ly.weight, a=1)
-                if not ly.bias is None: nn.init.constant_(ly.bias, 0)
-
-    def get_params(self):
-        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
-        for name, child in self.named_children():
-            child_wd_params, child_nowd_params = child.get_params()
-            if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
-                lr_mul_wd_params += child_wd_params
-                lr_mul_nowd_params += child_nowd_params
-            else:
-                wd_params += child_wd_params
-                nowd_params += child_nowd_params
-        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
-
-
-if __name__ == "__main__":
-    net = BiSeNet(19)
-    net.cuda()
-    net.eval()
-    in_ten = torch.randn(16, 3, 640, 480).cuda()
-    out, out16, out32 = net(in_ten)
-    print(out.shape)
-
-    net.get_params()
--- a/gimp-plugins/face-parsing.PyTorch/model.pyc
+++ b/gimp-plugins/face-parsing.PyTorch/model.pyc
--- a/gimp-plugins/face-parsing.PyTorch/modules/init.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/init.py
@ -1,5 +0,0 @@
-from .bn import ABN, InPlaceABN, InPlaceABNSync
-from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
-from .misc import GlobalAvgPool2d, SingleGPU
-from .residual import IdentityResidualBlock
-from .dense import DenseModule
--- a/gimp-plugins/face-parsing.PyTorch/modules/bn.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/bn.py
@ -1,130 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as functional
-
-try:
-    from queue import Queue
-except ImportError:
-    from Queue import Queue
-
-from .functions import *
-
-
-class ABN(nn.Module):
-    """Activated Batch Normalization
-
-    This gathers a `BatchNorm2d` and an activation function in a single module
-    """
-
-    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
-        """Creates an Activated Batch Normalization module
-
-        Parameters
-        ----------
-        num_features : int
-            Number of feature channels in the input and output.
-        eps : float
-            Small constant to prevent numerical issues.
-        momentum : float
-            Momentum factor applied to compute running statistics as.
-        affine : bool
-            If `True` apply learned scale and shift transformation after normalization.
-        activation : str
-            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
-        slope : float
-            Negative slope for the `leaky_relu` activation.
-        """
-        super(ABN, self).__init__()
-        self.num_features = num_features
-        self.affine = affine
-        self.eps = eps
-        self.momentum = momentum
-        self.activation = activation
-        self.slope = slope
-        if self.affine:
-            self.weight = nn.Parameter(torch.ones(num_features))
-            self.bias = nn.Parameter(torch.zeros(num_features))
-        else:
-            self.register_parameter('weight', None)
-            self.register_parameter('bias', None)
-        self.register_buffer('running_mean', torch.zeros(num_features))
-        self.register_buffer('running_var', torch.ones(num_features))
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        nn.init.constant_(self.running_mean, 0)
-        nn.init.constant_(self.running_var, 1)
-        if self.affine:
-            nn.init.constant_(self.weight, 1)
-            nn.init.constant_(self.bias, 0)
-
-    def forward(self, x):
-        x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
-                                  self.training, self.momentum, self.eps)
-
-        if self.activation == ACT_RELU:
-            return functional.relu(x, inplace=True)
-        elif self.activation == ACT_LEAKY_RELU:
-            return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
-        elif self.activation == ACT_ELU:
-            return functional.elu(x, inplace=True)
-        else:
-            return x
-
-    def __repr__(self):
-        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
-              ' affine={affine}, activation={activation}'
-        if self.activation == "leaky_relu":
-            rep += ', slope={slope})'
-        else:
-            rep += ')'
-        return rep.format(name=self.__class__.__name__, **self.__dict__)
-
-
-class InPlaceABN(ABN):
-    """InPlace Activated Batch Normalization"""
-
-    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
-        """Creates an InPlace Activated Batch Normalization module
-
-        Parameters
-        ----------
-        num_features : int
-            Number of feature channels in the input and output.
-        eps : float
-            Small constant to prevent numerical issues.
-        momentum : float
-            Momentum factor applied to compute running statistics as.
-        affine : bool
-            If `True` apply learned scale and shift transformation after normalization.
-        activation : str
-            Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
-        slope : float
-            Negative slope for the `leaky_relu` activation.
-        """
-        super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
-
-    def forward(self, x):
-        return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
-                           self.training, self.momentum, self.eps, self.activation, self.slope)
-
-
-class InPlaceABNSync(ABN):
-    """InPlace Activated Batch Normalization with cross-GPU synchronization
-    This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
-    """
-
-    def forward(self, x):
-        return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
-                                   self.training, self.momentum, self.eps, self.activation, self.slope)
-
-    def __repr__(self):
-        rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
-              ' affine={affine}, activation={activation}'
-        if self.activation == "leaky_relu":
-            rep += ', slope={slope})'
-        else:
-            rep += ')'
-        return rep.format(name=self.__class__.__name__, **self.__dict__)
-
-
--- a/gimp-plugins/face-parsing.PyTorch/modules/deeplab.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/deeplab.py
@ -1,84 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as functional
-
-from models._util import try_index
-from .bn import ABN
-
-
-class DeeplabV3(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 hidden_channels=256,
-                 dilations=(12, 24, 36),
-                 norm_act=ABN,
-                 pooling_size=None):
-        super(DeeplabV3, self).__init__()
-        self.pooling_size = pooling_size
-
-        self.map_convs = nn.ModuleList([
-            nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
-            nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
-        ])
-        self.map_bn = norm_act(hidden_channels * 4)
-
-        self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
-        self.global_pooling_bn = norm_act(hidden_channels)
-
-        self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
-        self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
-        self.red_bn = norm_act(out_channels)
-
-        self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
-
-    def reset_parameters(self, activation, slope):
-        gain = nn.init.calculate_gain(activation, slope)
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.xavier_normal_(m.weight.data, gain)
-                if hasattr(m, "bias") and m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif isinstance(m, ABN):
-                if hasattr(m, "weight") and m.weight is not None:
-                    nn.init.constant_(m.weight, 1)
-                if hasattr(m, "bias") and m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-
-    def forward(self, x):
-        # Map convolutions
-        out = torch.cat([m(x) for m in self.map_convs], dim=1)
-        out = self.map_bn(out)
-        out = self.red_conv(out)
-
-        # Global pooling
-        pool = self._global_pooling(x)
-        pool = self.global_pooling_conv(pool)
-        pool = self.global_pooling_bn(pool)
-        pool = self.pool_red_conv(pool)
-        if self.training or self.pooling_size is None:
-            pool = pool.repeat(1, 1, x.size(2), x.size(3))
-
-        out += pool
-        out = self.red_bn(out)
-        return out
-
-    def _global_pooling(self, x):
-        if self.training or self.pooling_size is None:
-            pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
-            pool = pool.view(x.size(0), x.size(1), 1, 1)
-        else:
-            pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
-                            min(try_index(self.pooling_size, 1), x.shape[3]))
-            padding = (
-                (pooling_size[1] - 1) // 2,
-                (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
-                (pooling_size[0] - 1) // 2,
-                (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
-            )
-
-            pool = functional.avg_pool2d(x, pooling_size, stride=1)
-            pool = functional.pad(pool, pad=padding, mode="replicate")
-        return pool
--- a/gimp-plugins/face-parsing.PyTorch/modules/dense.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/dense.py
@ -1,42 +0,0 @@
-from collections import OrderedDict
-
-import torch
-import torch.nn as nn
-
-from .bn import ABN
-
-
-class DenseModule(nn.Module):
-    def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
-        super(DenseModule, self).__init__()
-        self.in_channels = in_channels
-        self.growth = growth
-        self.layers = layers
-
-        self.convs1 = nn.ModuleList()
-        self.convs3 = nn.ModuleList()
-        for i in range(self.layers):
-            self.convs1.append(nn.Sequential(OrderedDict([
-                ("bn", norm_act(in_channels)),
-                ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
-            ])))
-            self.convs3.append(nn.Sequential(OrderedDict([
-                ("bn", norm_act(self.growth * bottleneck_factor)),
-                ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
-                                   dilation=dilation))
-            ])))
-            in_channels += self.growth
-
-    @property
-    def out_channels(self):
-        return self.in_channels + self.growth * self.layers
-
-    def forward(self, x):
-        inputs = [x]
-        for i in range(self.layers):
-            x = torch.cat(inputs, dim=1)
-            x = self.convs1[i](x)
-            x = self.convs3[i](x)
-            inputs += [x]
-
-        return torch.cat(inputs, dim=1)
--- a/gimp-plugins/face-parsing.PyTorch/modules/functions.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/functions.py
@ -1,234 +0,0 @@
-from os import path
-import torch 
-import torch.distributed as dist
-import torch.autograd as autograd
-import torch.cuda.comm as comm
-from torch.autograd.function import once_differentiable
-from torch.utils.cpp_extension import load
-
-_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
-_backend = load(name="inplace_abn",
-                extra_cflags=["-O3"],
-                sources=[path.join(_src_path, f) for f in [
-                    "inplace_abn.cpp",
-                    "inplace_abn_cpu.cpp",
-                    "inplace_abn_cuda.cu",
-                    "inplace_abn_cuda_half.cu"
-                ]],
-                extra_cuda_cflags=["--expt-extended-lambda"])
-
-# Activation names
-ACT_RELU = "relu"
-ACT_LEAKY_RELU = "leaky_relu"
-ACT_ELU = "elu"
-ACT_NONE = "none"
-
-
-def _check(fn, *args, **kwargs):
-    success = fn(*args, **kwargs)
-    if not success:
-        raise RuntimeError("CUDA Error encountered in {}".format(fn))
-
-
-def _broadcast_shape(x):
-    out_size = []
-    for i, s in enumerate(x.size()):
-        if i != 1:
-            out_size.append(1)
-        else:
-            out_size.append(s)
-    return out_size
-
-
-def _reduce(x):
-    if len(x.size()) == 2:
-        return x.sum(dim=0)
-    else:
-        n, c = x.size()[0:2]
-        return x.contiguous().view((n, c, -1)).sum(2).sum(0)
-
-
-def _count_samples(x):
-    count = 1
-    for i, s in enumerate(x.size()):
-        if i != 1:
-            count *= s
-    return count
-
-
-def _act_forward(ctx, x):
-    if ctx.activation == ACT_LEAKY_RELU:
-        _backend.leaky_relu_forward(x, ctx.slope)
-    elif ctx.activation == ACT_ELU:
-        _backend.elu_forward(x)
-    elif ctx.activation == ACT_NONE:
-        pass
-
-
-def _act_backward(ctx, x, dx):
-    if ctx.activation == ACT_LEAKY_RELU:
-        _backend.leaky_relu_backward(x, dx, ctx.slope)
-    elif ctx.activation == ACT_ELU:
-        _backend.elu_backward(x, dx)
-    elif ctx.activation == ACT_NONE:
-        pass
-
-
-class InPlaceABN(autograd.Function):
-    @staticmethod
-    def forward(ctx, x, weight, bias, running_mean, running_var,
-                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
-        # Save context
-        ctx.training = training
-        ctx.momentum = momentum
-        ctx.eps = eps
-        ctx.activation = activation
-        ctx.slope = slope
-        ctx.affine = weight is not None and bias is not None
-
-        # Prepare inputs
-        count = _count_samples(x)
-        x = x.contiguous()
-        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
-        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
-
-        if ctx.training:
-            mean, var = _backend.mean_var(x)
-
-            # Update running stats
-            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
-
-            # Mark in-place modified tensors
-            ctx.mark_dirty(x, running_mean, running_var)
-        else:
-            mean, var = running_mean.contiguous(), running_var.contiguous()
-            ctx.mark_dirty(x)
-
-        # BN forward + activation
-        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
-        _act_forward(ctx, x)
-
-        # Output
-        ctx.var = var
-        ctx.save_for_backward(x, var, weight, bias)
-        return x
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, dz):
-        z, var, weight, bias = ctx.saved_tensors
-        dz = dz.contiguous()
-
-        # Undo activation
-        _act_backward(ctx, z, dz)
-
-        if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
-        else:
-            # TODO: implement simplified CUDA backward for inference mode
-            edz = dz.new_zeros(dz.size(1))
-            eydz = dz.new_zeros(dz.size(1))
-
-        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
-        dweight = eydz * weight.sign() if ctx.affine else None
-        dbias = edz if ctx.affine else None
-
-        return dx, dweight, dbias, None, None, None, None, None, None, None
-
-class InPlaceABNSync(autograd.Function):
-    @classmethod
-    def forward(cls, ctx, x, weight, bias, running_mean, running_var,
-                training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
-        # Save context
-        ctx.training = training
-        ctx.momentum = momentum
-        ctx.eps = eps
-        ctx.activation = activation
-        ctx.slope = slope
-        ctx.affine = weight is not None and bias is not None
-
-        # Prepare inputs
-        ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
-
-        #count = _count_samples(x)
-        batch_size = x.new_tensor([x.shape[0]],dtype=torch.long)
-
-        x = x.contiguous()
-        weight = weight.contiguous() if ctx.affine else x.new_empty(0)
-        bias = bias.contiguous() if ctx.affine else x.new_empty(0)
-
-        if ctx.training:
-            mean, var = _backend.mean_var(x)
-            if ctx.world_size>1:
-                # get global batch size
-                if equal_batches:
-                    batch_size *= ctx.world_size
-                else:
-                    dist.all_reduce(batch_size, dist.ReduceOp.SUM)
-
-                ctx.factor = x.shape[0]/float(batch_size.item())
-
-                mean_all = mean.clone() * ctx.factor
-                dist.all_reduce(mean_all, dist.ReduceOp.SUM)
-
-                var_all = (var + (mean - mean_all) ** 2) * ctx.factor
-                dist.all_reduce(var_all, dist.ReduceOp.SUM)
-
-                mean = mean_all
-                var = var_all
-
-            # Update running stats
-            running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
-            count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1]
-            running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
-
-            # Mark in-place modified tensors
-            ctx.mark_dirty(x, running_mean, running_var)
-        else:
-            mean, var = running_mean.contiguous(), running_var.contiguous()
-            ctx.mark_dirty(x)
-
-        # BN forward + activation
-        _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
-        _act_forward(ctx, x)
-
-        # Output
-        ctx.var = var
-        ctx.save_for_backward(x, var, weight, bias)
-        return x
-
-    @staticmethod
-    @once_differentiable
-    def backward(ctx, dz):
-        z, var, weight, bias = ctx.saved_tensors
-        dz = dz.contiguous()
-
-        # Undo activation
-        _act_backward(ctx, z, dz)
-
-        if ctx.training:
-            edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
-            edz_local = edz.clone()
-            eydz_local = eydz.clone()
-
-            if ctx.world_size>1:
-                edz *= ctx.factor
-                dist.all_reduce(edz, dist.ReduceOp.SUM)
-
-                eydz *= ctx.factor
-                dist.all_reduce(eydz, dist.ReduceOp.SUM)
-        else:
-            edz_local = edz = dz.new_zeros(dz.size(1))
-            eydz_local = eydz = dz.new_zeros(dz.size(1))
-
-        dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
-        dweight = eydz_local * weight.sign() if ctx.affine else None
-        dbias = edz_local if ctx.affine else None
-
-        return dx, dweight, dbias, None, None, None, None, None, None, None
-
-inplace_abn = InPlaceABN.apply
-inplace_abn_sync = InPlaceABNSync.apply
-
-__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
--- a/gimp-plugins/face-parsing.PyTorch/modules/misc.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/misc.py
@ -1,21 +0,0 @@
-import torch.nn as nn
-import torch
-import torch.distributed as dist
-
-class GlobalAvgPool2d(nn.Module):
-    def __init__(self):
-        """Global average pooling over the input's spatial dimensions"""
-        super(GlobalAvgPool2d, self).__init__()
-
-    def forward(self, inputs):
-        in_size = inputs.size()
-        return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
-
-class SingleGPU(nn.Module):
-    def __init__(self, module):
-        super(SingleGPU, self).__init__()
-        self.module=module
-
-    def forward(self, input):
-        return self.module(input.cuda(non_blocking=True))
-
--- a/gimp-plugins/face-parsing.PyTorch/modules/residual.py
+++ b/gimp-plugins/face-parsing.PyTorch/modules/residual.py
@ -1,88 +0,0 @@
-from collections import OrderedDict
-
-import torch.nn as nn
-
-from .bn import ABN
-
-
-class IdentityResidualBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 channels,
-                 stride=1,
-                 dilation=1,
-                 groups=1,
-                 norm_act=ABN,
-                 dropout=None):
-        """Configurable identity-mapping residual block
-
-        Parameters
-        ----------
-        in_channels : int
-            Number of input channels.
-        channels : list of int
-            Number of channels in the internal feature maps. Can either have two or three elements: if three construct
-            a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
-            `3 x 3` then `1 x 1` convolutions.
-        stride : int
-            Stride of the first `3 x 3` convolution
-        dilation : int
-            Dilation to apply to the `3 x 3` convolutions.
-        groups : int
-            Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
-            bottleneck blocks.
-        norm_act : callable
-            Function to create normalization / activation Module.
-        dropout: callable
-            Function to create Dropout Module.
-        """
-        super(IdentityResidualBlock, self).__init__()
-
-        # Check parameters for inconsistencies
-        if len(channels) != 2 and len(channels) != 3:
-            raise ValueError("channels must contain either two or three values")
-        if len(channels) == 2 and groups != 1:
-            raise ValueError("groups > 1 are only valid if len(channels) == 3")
-
-        is_bottleneck = len(channels) == 3
-        need_proj_conv = stride != 1 or in_channels != channels[-1]
-
-        self.bn1 = norm_act(in_channels)
-        if not is_bottleneck:
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
-                                    dilation=dilation)),
-                ("bn2", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
-                                    dilation=dilation))
-            ]
-            if dropout is not None:
-                layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
-        else:
-            layers = [
-                ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
-                ("bn2", norm_act(channels[0])),
-                ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
-                                    groups=groups, dilation=dilation)),
-                ("bn3", norm_act(channels[1])),
-                ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
-            ]
-            if dropout is not None:
-                layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
-        self.convs = nn.Sequential(OrderedDict(layers))
-
-        if need_proj_conv:
-            self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
-
-    def forward(self, x):
-        if hasattr(self, "proj_conv"):
-            bn1 = self.bn1(x)
-            shortcut = self.proj_conv(bn1)
-        else:
-            shortcut = x.clone()
-            bn1 = self.bn1(x)
-
-        out = self.convs(bn1)
-        out.add_(shortcut)
-
-        return out
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/checks.h
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/checks.h
@ -1,15 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
-#ifndef AT_CHECK
-#define AT_CHECK AT_ASSERT
-#endif
-
-#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
-#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
-
-#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
-#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn.cpp
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn.cpp
@ -1,95 +0,0 @@
-#include <torch/extension.h>
-
-#include <vector>
-
-#include "inplace_abn.h"
-
-std::vector<at::Tensor> mean_var(at::Tensor x) {
-  if (x.is_cuda()) {
-    if (x.type().scalarType() == at::ScalarType::Half) {
-      return mean_var_cuda_h(x);
-    } else {
-      return mean_var_cuda(x);
-    }
-  } else {
-    return mean_var_cpu(x);
-  }
-}
-
-at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                   bool affine, float eps) {
-  if (x.is_cuda()) {
-    if (x.type().scalarType() == at::ScalarType::Half) {
-      return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
-    } else {
-      return forward_cuda(x, mean, var, weight, bias, affine, eps);
-    }
-  } else {
-    return forward_cpu(x, mean, var, weight, bias, affine, eps);
-  }
-}
-
-std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                 bool affine, float eps) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
-    } else {
-      return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
-	}
-  } else {
-    return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
-  }
-}
-
-at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                 at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
-	} else {
-      return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
-    }
-  } else {
-    return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
-  }
-}
-
-void leaky_relu_forward(at::Tensor z, float slope) {
-  at::leaky_relu_(z, slope);
-}
-
-void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
-  if (z.is_cuda()) {
-    if (z.type().scalarType() == at::ScalarType::Half) {
-      return leaky_relu_backward_cuda_h(z, dz, slope);
-	} else {
-      return leaky_relu_backward_cuda(z, dz, slope);
-    }
-  } else {
-    return leaky_relu_backward_cpu(z, dz, slope);
-  }
-}
-
-void elu_forward(at::Tensor z) {
-  at::elu_(z);
-}
-
-void elu_backward(at::Tensor z, at::Tensor dz) {
-  if (z.is_cuda()) {
-    return elu_backward_cuda(z, dz);
-  } else {
-    return elu_backward_cpu(z, dz);
-  }
-}
-
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("mean_var", &mean_var, "Mean and variance computation");
-  m.def("forward", &forward, "In-place forward computation");
-  m.def("edz_eydz", &edz_eydz, "First part of backward computation");
-  m.def("backward", &backward, "Second part of backward computation");
-  m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
-  m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
-  m.def("elu_forward", &elu_forward, "Elu forward computation");
-  m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
-}
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn.h
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn.h
@ -1,88 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-#include <vector>
-
-std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
-std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
-std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x);
-
-at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                       bool affine, float eps);
-at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps);
-at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                          bool affine, float eps);
-
-std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                     bool affine, float eps);
-std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps);
-std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                        bool affine, float eps);
-
-at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                        at::Tensor edz, at::Tensor eydz, bool affine, float eps);
-
-void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
-void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
-void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
-
-void elu_backward_cpu(at::Tensor z, at::Tensor dz);
-void elu_backward_cuda(at::Tensor z, at::Tensor dz);
-
-static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
-  num = x.size(0);
-  chn = x.size(1);
-  sp = 1;
-  for (int64_t i = 2; i < x.ndimension(); ++i)
-    sp *= x.size(i);
-}
-
-/*
- * Specialized CUDA reduction functions for BN
- */
-#ifdef __CUDACC__
-
-#include "utils/cuda.cuh"
-
-template <typename T, typename Op>
-__device__ T reduce(Op op, int plane, int N, int S) {
-  T sum = (T)0;
-  for (int batch = 0; batch < N; ++batch) {
-    for (int x = threadIdx.x; x < S; x += blockDim.x) {
-      sum += op(batch, plane, x);
-    }
-  }
-
-  // sum over NumThreads within a warp
-  sum = warpSum(sum);
-
-  // 'transpose', and reduce within warp again
-  __shared__ T shared[32];
-  __syncthreads();
-  if (threadIdx.x % WARP_SIZE == 0) {
-    shared[threadIdx.x / WARP_SIZE] = sum;
-  }
-  if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
-    // zero out the other entries in shared
-    shared[threadIdx.x] = (T)0;
-  }
-  __syncthreads();
-  if (threadIdx.x / WARP_SIZE == 0) {
-    sum = warpSum(shared[threadIdx.x]);
-    if (threadIdx.x == 0) {
-      shared[0] = sum;
-    }
-  }
-  __syncthreads();
-
-  // Everyone picks it up, should be broadcast into the whole gradInput
-  return shared[0];
-}
-#endif
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cpu.cpp
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cpu.cpp
@ -1,119 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "inplace_abn.h"
-
-at::Tensor reduce_sum(at::Tensor x) {
-  if (x.ndimension() == 2) {
-    return x.sum(0);
-  } else {
-    auto x_view = x.view({x.size(0), x.size(1), -1});
-    return x_view.sum(-1).sum(0);
-  }
-}
-
-at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
-  if (x.ndimension() == 2) {
-    return v;
-  } else {
-    std::vector<int64_t> broadcast_size = {1, -1};
-    for (int64_t i = 2; i < x.ndimension(); ++i)
-      broadcast_size.push_back(1);
-
-    return v.view(broadcast_size);
-  }
-}
-
-int64_t count(at::Tensor x) {
-  int64_t count = x.size(0);
-  for (int64_t i = 2; i < x.ndimension(); ++i)
-    count *= x.size(i);
-
-  return count;
-}
-
-at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
-  if (affine) {
-    return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
-  } else {
-    return z;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
-  auto num = count(x);
-  auto mean = reduce_sum(x) / num;
-  auto diff = x - broadcast_to(mean, x);
-  auto var = reduce_sum(diff.pow(2)) / num;
-
-  return {mean, var};
-}
-
-at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                       bool affine, float eps) {
-  auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
-  auto mul = at::rsqrt(var + eps) * gamma;
-
-  x.sub_(broadcast_to(mean, x));
-  x.mul_(broadcast_to(mul, x));
-  if (affine) x.add_(broadcast_to(bias, x));
-
-  return x;
-}
-
-std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                     bool affine, float eps) {
-  auto edz = reduce_sum(dz);
-  auto y = invert_affine(z, weight, bias, affine, eps);
-  auto eydz = reduce_sum(y * dz);
-
-  return {edz, eydz};
-}
-
-at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                     at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  auto y = invert_affine(z, weight, bias, affine, eps);
-  auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
-
-  auto num = count(z);
-  auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
-  return dx;
-}
-
-void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CPU_INPUT(z);
-  CHECK_CPU_INPUT(dz);
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
-    int64_t count = z.numel();
-    auto *_z = z.data<scalar_t>();
-    auto *_dz = dz.data<scalar_t>();
-
-    for (int64_t i = 0; i < count; ++i) {
-      if (_z[i] < 0) {
-        _z[i] *= 1 / slope;
-        _dz[i] *= slope;
-      }
-    }
-  }));
-}
-
-void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
-  CHECK_CPU_INPUT(z);
-  CHECK_CPU_INPUT(dz);
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
-    int64_t count = z.numel();
-    auto *_z = z.data<scalar_t>();
-    auto *_dz = dz.data<scalar_t>();
-
-    for (int64_t i = 0; i < count; ++i) {
-      if (_z[i] < 0) {
-        _z[i] = log1p(_z[i]);
-        _dz[i] *= (_z[i] + 1.f);
-      }
-    }
-  }));
-}
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cuda.cu
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cuda.cu
@ -1,333 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <thrust/device_ptr.h>
-#include <thrust/transform.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "utils/cuda.cuh"
-#include "inplace_abn.h"
-
-#include <ATen/cuda/CUDAContext.h>
-
-// Operations for reduce
-template<typename T>
-struct SumOp {
-  __device__ SumOp(const T *t, int c, int s)
-      : tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
-    return tensor[(batch * chn + plane) * sp + n];
-  }
-  const T *tensor;
-  const int chn;
-  const int sp;
-};
-
-template<typename T>
-struct VarOp {
-  __device__ VarOp(T m, const T *t, int c, int s)
-      : mean(m), tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ T operator()(int batch, int plane, int n) {
-    T val = tensor[(batch * chn + plane) * sp + n];
-    return (val - mean) * (val - mean);
-  }
-  const T mean;
-  const T *tensor;
-  const int chn;
-  const int sp;
-};
-
-template<typename T>
-struct GradOp {
-  __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
-      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
-  __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
-    T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
-    T _dz = dz[(batch * chn + plane) * sp + n];
-    return Pair<T>(_dz, _y * _dz);
-  }
-  const T weight;
-  const T bias;
-  const T *z;
-  const T *dz;
-  const int chn;
-  const int sp;
-};
-
-/***********
- * mean_var
- ***********/
-
-template<typename T>
-__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-  T norm = T(1) / T(num * sp);
-
-  T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, sp) * norm;
-  __syncthreads();
-  T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, sp) * norm;
-
-  if (threadIdx.x == 0) {
-    mean[plane] = _mean;
-    var[plane] = _var;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
-  CHECK_CUDA_INPUT(x);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Prepare output tensors
-  auto mean = at::empty({chn}, x.options());
-  auto var = at::empty({chn}, x.options());
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
-    mean_var_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        x.data<scalar_t>(),
-        mean.data<scalar_t>(),
-        var.data<scalar_t>(),
-        num, chn, sp);
-  }));
-
-  return {mean, var};
-}
-
-/**********
- * forward
- **********/
-
-template<typename T>
-__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
-                               bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _mean = mean[plane];
-  T _var = var[plane];
-  T _weight = affine ? abs(weight[plane]) + eps : T(1);
-  T _bias = affine ? bias[plane] : T(0);
-
-  T mul = rsqrt(_var + eps) * _weight;
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      T _x = x[(batch * chn + plane) * sp + n];
-      T _y = (_x - _mean) * mul + _bias;
-
-      x[(batch * chn + plane) * sp + n] = _y;
-    }
-  }
-}
-
-at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps) {
-  CHECK_CUDA_INPUT(x);
-  CHECK_CUDA_INPUT(mean);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
-    forward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        x.data<scalar_t>(),
-        mean.data<scalar_t>(),
-        var.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return x;
-}
-
-/***********
- * edz_eydz
- ***********/
-
-template<typename T>
-__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
-                                T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  T _bias = affine ? bias[plane] : 0.f;
-
-  Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, sp);
-  __syncthreads();
-
-  if (threadIdx.x == 0) {
-    edz[plane] = res.v1;
-    eydz[plane] = res.v2;
-  }
-}
-
-std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto edz = at::empty({chn}, z.options());
-  auto eydz = at::empty({chn}, z.options());
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
-    edz_eydz_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        z.data<scalar_t>(),
-        dz.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        edz.data<scalar_t>(),
-        eydz.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return {edz, eydz};
-}
-
-/***********
- * backward
- ***********/
-
-template<typename T>
-__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
-	                        const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  T _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  T _bias = affine ? bias[plane] : 0.f;
-  T _var = var[plane];
-  T _edz = edz[plane];
-  T _eydz = eydz[plane];
-
-  T _mul = _weight * rsqrt(_var + eps);
-  T count = T(num * sp);
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      T _dz = dz[(batch * chn + plane) * sp + n];
-      T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
-
-      dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
-    }
-  }
-}
-
-at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-  CHECK_CUDA_INPUT(edz);
-  CHECK_CUDA_INPUT(eydz);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto dx = at::zeros_like(z);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
-    backward_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
-        z.data<scalar_t>(),
-        dz.data<scalar_t>(),
-        var.data<scalar_t>(),
-        weight.data<scalar_t>(),
-        bias.data<scalar_t>(),
-        edz.data<scalar_t>(),
-        eydz.data<scalar_t>(),
-        dx.data<scalar_t>(),
-        affine, eps, num, chn, sp);
-  }));
-
-  return dx;
-}
-
-/**************
- * activations
- **************/
-
-template<typename T>
-inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
-  // Create thrust pointers
-  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
-  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
-
-  auto stream = at::cuda::getCurrentCUDAStream();
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_dz, th_dz + count, th_z, th_dz,
-                       [slope] __device__ (const T& dz) { return dz * slope; },
-                       [] __device__ (const T& z) { return z < 0; });
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_z, th_z + count, th_z,
-                       [slope] __device__ (const T& z) { return z / slope; },
-                       [] __device__ (const T& z) { return z < 0; });
-}
-
-void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
-    leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
-  }));
-}
-
-template<typename T>
-inline void elu_backward_impl(T *z, T *dz, int64_t count) {
-  // Create thrust pointers
-  thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
-  thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
-
-  auto stream = at::cuda::getCurrentCUDAStream();
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_dz, th_dz + count, th_z, th_z, th_dz,
-                       [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
-                       [] __device__ (const T& z) { return z < 0; });
-  thrust::transform_if(thrust::cuda::par.on(stream),
-                       th_z, th_z + count, th_z,
-                       [] __device__ (const T& z) { return log1p(z); },
-                       [] __device__ (const T& z) { return z < 0; });
-}
-
-void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-
-  AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
-    elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
-  }));
-}
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cuda_half.cu
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/inplace_abn_cuda_half.cu
@ -1,275 +0,0 @@
-#include <ATen/ATen.h>
-
-#include <cuda_fp16.h>
-
-#include <vector>
-
-#include "utils/checks.h"
-#include "utils/cuda.cuh"
-#include "inplace_abn.h"
-
-#include <ATen/cuda/CUDAContext.h>
-
-// Operations for reduce
-struct SumOpH {
-  __device__ SumOpH(const half *t, int c, int s)
-      : tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
-    return __half2float(tensor[(batch * chn + plane) * sp + n]);
-  }
-  const half *tensor;
-  const int chn;
-  const int sp;
-};
-
-struct VarOpH {
-  __device__ VarOpH(float m, const half *t, int c, int s)
-      : mean(m), tensor(t), chn(c), sp(s) {}
-  __device__ __forceinline__ float operator()(int batch, int plane, int n) {
-    const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
-    return (t - mean) * (t - mean);
-  }
-  const float mean;
-  const half *tensor;
-  const int chn;
-  const int sp;
-};
-
-struct GradOpH {
-  __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
-      : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
-  __device__ __forceinline__ Pair<float> operator()(int batch, int plane, int n) {
-    float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
-    float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
-    return Pair<float>(_dz, _y * _dz);
-  }
-  const float weight;
-  const float bias;
-  const half *z;
-  const half *dz;
-  const int chn;
-  const int sp;
-};
-
-/***********
- * mean_var
- ***********/
-
-__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-  float norm = 1.f / static_cast<float>(num * sp);
-
-  float _mean = reduce<float, SumOpH>(SumOpH(x, chn, sp), plane, num, sp) * norm;
-  __syncthreads();
-  float _var = reduce<float, VarOpH>(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
-
-  if (threadIdx.x == 0) {
-    mean[plane] = _mean;
-    var[plane] = _var;
-  }
-}
-
-std::vector<at::Tensor> mean_var_cuda_h(at::Tensor x) {
-  CHECK_CUDA_INPUT(x);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Prepare output tensors
-  auto mean = at::empty({chn},x.options().dtype(at::kFloat));
-  auto var = at::empty({chn},x.options().dtype(at::kFloat));
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  mean_var_kernel_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(x.data<at::Half>()),
-      mean.data<float>(),
-      var.data<float>(),
-      num, chn, sp);
-
-  return {mean, var};
-}
-
-/**********
- * forward
- **********/
-
-__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
-                                 bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  const float _mean = mean[plane];
-  const float _var = var[plane];
-  const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  const float _bias = affine ? bias[plane] : 0.f;
-
-  const float mul = rsqrt(_var + eps) * _weight;
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      half *x_ptr = x + (batch * chn + plane) * sp + n;
-      float _x = __half2float(*x_ptr);
-      float _y = (_x - _mean) * mul + _bias;
-
-      *x_ptr = __float2half(_y);
-    }
-  }
-}
-
-at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                        bool affine, float eps) {
-  CHECK_CUDA_INPUT(x);
-  CHECK_CUDA_INPUT(mean);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(x, num, chn, sp);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  forward_kernel_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(x.data<at::Half>()),
-      mean.data<float>(),
-      var.data<float>(),
-      weight.data<float>(),
-      bias.data<float>(),
-      affine, eps, num, chn, sp);
-
-  return x;
-}
-
-__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
-                                float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  float _bias = affine ? bias[plane] : 0.f;
-
-  Pair<float> res = reduce<Pair<float>, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
-  __syncthreads();
-
-  if (threadIdx.x == 0) {
-    edz[plane] = res.v1;
-    eydz[plane] = res.v2;
-  }
-}
-
-std::vector<at::Tensor> edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
-                                      bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto edz = at::empty({chn},z.options().dtype(at::kFloat));
-  auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  edz_eydz_kernel_h<<<blocks, threads, 0, stream>>>(
-        reinterpret_cast<half*>(z.data<at::Half>()),
-        reinterpret_cast<half*>(dz.data<at::Half>()),
-        weight.data<float>(),
-        bias.data<float>(),
-        edz.data<float>(),
-        eydz.data<float>(),
-        affine, eps, num, chn, sp);
- 
-  return {edz, eydz};
-}
-
-__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
-                                  const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
-  int plane = blockIdx.x;
-
-  float _weight = affine ? abs(weight[plane]) + eps : 1.f;
-  float _bias = affine ? bias[plane] : 0.f;
-  float _var = var[plane];
-  float _edz = edz[plane];
-  float _eydz = eydz[plane];
-
-  float _mul = _weight * rsqrt(_var + eps);
-  float count = float(num * sp);
-
-  for (int batch = 0; batch < num; ++batch) {
-    for (int n = threadIdx.x; n < sp; n += blockDim.x) {
-      float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
-      float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
-
-      dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
-    }
-  }
-}
-
-at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
-                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-  CHECK_CUDA_INPUT(var);
-  CHECK_CUDA_INPUT(weight);
-  CHECK_CUDA_INPUT(bias);
-  CHECK_CUDA_INPUT(edz);
-  CHECK_CUDA_INPUT(eydz);
-
-  // Extract dimensions
-  int64_t num, chn, sp;
-  get_dims(z, num, chn, sp);
-
-  auto dx = at::zeros_like(z);
-
-  // Run kernel
-  dim3 blocks(chn);
-  dim3 threads(getNumThreads(sp));
-  auto stream = at::cuda::getCurrentCUDAStream();
-  backward_kernel_h<<<blocks, threads, 0, stream>>>(
-        reinterpret_cast<half*>(z.data<at::Half>()),
-        reinterpret_cast<half*>(dz.data<at::Half>()),
-        var.data<float>(),
-        weight.data<float>(),
-        bias.data<float>(),
-        edz.data<float>(),
-        eydz.data<float>(),
-        reinterpret_cast<half*>(dx.data<at::Half>()),
-        affine, eps, num, chn, sp);
-
-  return dx;
-}
-
-__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count;  i += blockDim.x * gridDim.x){
-    float _z = __half2float(z[i]);
-    if (_z < 0) {
-      dz[i] = __float2half(__half2float(dz[i]) * slope);
-      z[i] = __float2half(_z / slope);
-    }
-  }
-}
-
-void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
-  CHECK_CUDA_INPUT(z);
-  CHECK_CUDA_INPUT(dz);
-
-  int64_t count = z.numel();
-  dim3 threads(getNumThreads(count));
-  dim3 blocks = (count + threads.x - 1) / threads.x;
-  auto stream = at::cuda::getCurrentCUDAStream();
-  leaky_relu_backward_impl_h<<<blocks, threads, 0, stream>>>(
-      reinterpret_cast<half*>(z.data<at::Half>()),
-      reinterpret_cast<half*>(dz.data<at::Half>()),
-      slope, count);
-}
-
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/utils/checks.h
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/utils/checks.h
@ -1,15 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
-#ifndef AT_CHECK
-#define AT_CHECK AT_ASSERT
-#endif
-
-#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
-#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
-#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
-
-#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
-#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/utils/common.h
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/utils/common.h
@ -1,49 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-/*
- * Functions to share code between CPU and GPU
- */
-
-#ifdef __CUDACC__
-// CUDA versions
-
-#define HOST_DEVICE __host__ __device__
-#define INLINE_HOST_DEVICE __host__ __device__ inline
-#define FLOOR(x) floor(x)
-
-#if __CUDA_ARCH__ >= 600
-// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
-#define ACCUM(x,y) atomicAdd_block(&(x),(y))
-#else
-// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
-// and use the known atomicCAS-based implementation for double
-template<typename data_t>
-__device__ inline data_t atomic_add(data_t *address, data_t val) {
-  return atomicAdd(address, val);
-}
-
-template<>
-__device__ inline double atomic_add(double *address, double val) {
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
-  unsigned long long int old = *address_as_ull, assumed;
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-#define ACCUM(x,y) atomic_add(&(x),(y))
-#endif // #if __CUDA_ARCH__ >= 600
-
-#else
-// CPU versions
-
-#define HOST_DEVICE
-#define INLINE_HOST_DEVICE inline
-#define FLOOR(x) std::floor(x)
-#define ACCUM(x,y) (x) += (y)
-
-#endif // #ifdef __CUDACC__
--- a/gimp-plugins/face-parsing.PyTorch/modules/src/utils/cuda.cuh
+++ b/gimp-plugins/face-parsing.PyTorch/modules/src/utils/cuda.cuh
@ -1,71 +0,0 @@
-#pragma once
-
-/*
- * General settings and functions
- */
-const int WARP_SIZE = 32;
-const int MAX_BLOCK_SIZE = 1024;
-
-static int getNumThreads(int nElem) {
-  int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
-  for (int i = 0; i < 6; ++i) {
-    if (nElem <= threadSizes[i]) {
-      return threadSizes[i];
-    }
-  }
-  return MAX_BLOCK_SIZE;
-}
-
-/*
- * Reduction utilities
- */
-template <typename T>
-__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
-                                           unsigned int mask = 0xffffffff) {
-#if CUDART_VERSION >= 9000
-  return __shfl_xor_sync(mask, value, laneMask, width);
-#else
-  return __shfl_xor(value, laneMask, width);
-#endif
-}
-
-__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
-
-template<typename T>
-struct Pair {
-  T v1, v2;
-  __device__ Pair() {}
-  __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
-  __device__ Pair(T v) : v1(v), v2(v) {}
-  __device__ Pair(int v) : v1(v), v2(v) {}
-  __device__ Pair &operator+=(const Pair<T> &a) {
-    v1 += a.v1;
-    v2 += a.v2;
-    return *this;
-  }
-};
-
-template<typename T>
-static __device__ __forceinline__ T warpSum(T val) {
-#if __CUDA_ARCH__ >= 300
-  for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
-    val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
-  }
-#else
-  __shared__ T values[MAX_BLOCK_SIZE];
-  values[threadIdx.x] = val;
-  __threadfence_block();
-  const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
-  for (int i = 1; i < WARP_SIZE; i++) {
-    val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
-  }
-#endif
-  return val;
-}
-
-template<typename T>
-static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
-  value.v1 = warpSum(value.v1);
-  value.v2 = warpSum(value.v2);
-  return value;
-}
--- a/gimp-plugins/face-parsing.PyTorch/optimizer.py
+++ b/gimp-plugins/face-parsing.PyTorch/optimizer.py
@ -1,69 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-
-import torch
-import logging
-
-logger = logging.getLogger()
-
-class Optimizer(object):
-    def __init__(self,
-                model,
-                lr0,
-                momentum,
-                wd,
-                warmup_steps,
-                warmup_start_lr,
-                max_iter,
-                power,
-                *args, **kwargs):
-        self.warmup_steps = warmup_steps
-        self.warmup_start_lr = warmup_start_lr
-        self.lr0 = lr0
-        self.lr = self.lr0
-        self.max_iter = float(max_iter)
-        self.power = power
-        self.it = 0
-        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params()
-        param_list = [
-                {'params': wd_params},
-                {'params': nowd_params, 'weight_decay': 0},
-                {'params': lr_mul_wd_params, 'lr_mul': True},
-                {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}]
-        self.optim = torch.optim.SGD(
-                param_list,
-                lr = lr0,
-                momentum = momentum,
-                weight_decay = wd)
-        self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps)
-
-
-    def get_lr(self):
-        if self.it <= self.warmup_steps:
-            lr = self.warmup_start_lr*(self.warmup_factor**self.it)
-        else:
-            factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power
-            lr = self.lr0 * factor
-        return lr
-
-
-    def step(self):
-        self.lr = self.get_lr()
-        for pg in self.optim.param_groups:
-            if pg.get('lr_mul', False):
-                pg['lr'] = self.lr * 10
-            else:
-                pg['lr'] = self.lr
-        if self.optim.defaults.get('lr_mul', False):
-            self.optim.defaults['lr'] = self.lr * 10
-        else:
-            self.optim.defaults['lr'] = self.lr
-        self.it += 1
-        self.optim.step()
-        if self.it == self.warmup_steps+2:
-            logger.info('==> warmup done, start to implement poly lr strategy')
-
-    def zero_grad(self):
-        self.optim.zero_grad()
-
--- a/gimp-plugins/face-parsing.PyTorch/prepropess_data.py
+++ b/gimp-plugins/face-parsing.PyTorch/prepropess_data.py
@ -1,38 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-import os.path as osp
-import os
-import cv2
-from transform import *
-from PIL import Image
-
-face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
-face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
-mask_path = '/home/zll/data/CelebAMask-HQ/mask'
-counter = 0
-total = 0
-for i in range(15):
-
-    atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
-            'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
-
-    for j in range(i * 2000, (i + 1) * 2000):
-
-        mask = np.zeros((512, 512))
-
-        for l, att in enumerate(atts, 1):
-            total += 1
-            file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
-            path = osp.join(face_sep_mask, str(i), file_name)
-
-            if os.path.exists(path):
-                counter += 1
-                sep_mask = np.array(Image.open(path).convert('P'))
-                # print(np.unique(sep_mask))
-
-                mask[sep_mask == 225] = l
-        cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
-        print(j)
-
-print(counter, total)
--- a/gimp-plugins/face-parsing.PyTorch/resnet.py
+++ b/gimp-plugins/face-parsing.PyTorch/resnet.py
@ -1,109 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.model_zoo as modelzoo
-
-# from modules.bn import InPlaceABNSync as BatchNorm2d
-
-resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-class BasicBlock(nn.Module):
-    def __init__(self, in_chan, out_chan, stride=1):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(in_chan, out_chan, stride)
-        self.bn1 = nn.BatchNorm2d(out_chan)
-        self.conv2 = conv3x3(out_chan, out_chan)
-        self.bn2 = nn.BatchNorm2d(out_chan)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = None
-        if in_chan != out_chan or stride != 1:
-            self.downsample = nn.Sequential(
-                nn.Conv2d(in_chan, out_chan,
-                          kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(out_chan),
-                )
-
-    def forward(self, x):
-        residual = self.conv1(x)
-        residual = F.relu(self.bn1(residual))
-        residual = self.conv2(residual)
-        residual = self.bn2(residual)
-
-        shortcut = x
-        if self.downsample is not None:
-            shortcut = self.downsample(x)
-
-        out = shortcut + residual
-        out = self.relu(out)
-        return out
-
-
-def create_layer_basic(in_chan, out_chan, bnum, stride=1):
-    layers = [BasicBlock(in_chan, out_chan, stride=stride)]
-    for i in range(bnum-1):
-        layers.append(BasicBlock(out_chan, out_chan, stride=1))
-    return nn.Sequential(*layers)
-
-
-class Resnet18(nn.Module):
-    def __init__(self):
-        super(Resnet18, self).__init__()
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
-        self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
-        self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
-        self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
-        self.init_weight()
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = F.relu(self.bn1(x))
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        feat8 = self.layer2(x) # 1/8
-        feat16 = self.layer3(feat8) # 1/16
-        feat32 = self.layer4(feat16) # 1/32
-        return feat8, feat16, feat32
-
-    def init_weight(self):
-        state_dict = modelzoo.load_url(resnet18_url)
-        self_state_dict = self.state_dict()
-        for k, v in state_dict.items():
-            if 'fc' in k: continue
-            self_state_dict.update({k: v})
-        self.load_state_dict(self_state_dict)
-
-    def get_params(self):
-        wd_params, nowd_params = [], []
-        for name, module in self.named_modules():
-            if isinstance(module, (nn.Linear, nn.Conv2d)):
-                wd_params.append(module.weight)
-                if not module.bias is None:
-                    nowd_params.append(module.bias)
-            elif isinstance(module,  nn.BatchNorm2d):
-                nowd_params += list(module.parameters())
-        return wd_params, nowd_params
-
-
-if __name__ == "__main__":
-    net = Resnet18()
-    x = torch.randn(16, 3, 224, 224)
-    out = net(x)
-    print(out[0].size())
-    print(out[1].size())
-    print(out[2].size())
-    net.get_params()
--- a/gimp-plugins/face-parsing.PyTorch/resnet.pyc
+++ b/gimp-plugins/face-parsing.PyTorch/resnet.pyc
--- a/gimp-plugins/face-parsing.PyTorch/test.py
+++ b/gimp-plugins/face-parsing.PyTorch/test.py
@ -1,100 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-from logger import setup_logger
-from model import BiSeNet
-
-import torch
-
-import os
-import os.path as osp
-import numpy as np
-from PIL import Image
-import torchvision.transforms as transforms
-import cv2
-
-def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
-    # Colors for all 20 parts
-    part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
-                   [255, 0, 85], [255, 0, 170],
-                   [0, 255, 0], [85, 255, 0], [170, 255, 0],
-                   [0, 255, 85], [0, 255, 170],
-                   [0, 0, 255], [85, 0, 255], [170, 0, 255],
-                   [0, 85, 255], [0, 170, 255],
-                   [255, 255, 0], [255, 255, 85], [255, 255, 170],
-                   [255, 0, 255], [255, 85, 255], [255, 170, 255],
-                   [0, 255, 255], [85, 255, 255], [170, 255, 255]]
-
-    im = np.array(im)
-    vis_im = im.copy().astype(np.uint8)
-    vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
-    vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
-    vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
-
-    num_of_class = np.max(vis_parsing_anno)
-
-    for pi in range(1, num_of_class + 1):
-        index = np.where(vis_parsing_anno == pi)
-        vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
-
-    vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
-    # print(vis_parsing_anno_color.shape, vis_im.shape)
-    vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
-
-    # Save result or not
-    if save_im:
-        cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
-        cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
-
-    # return vis_im
-
-def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
-
-    if not os.path.exists(respth):
-        os.makedirs(respth)
-
-    n_classes = 19
-    net = BiSeNet(n_classes=n_classes)
-    save_pth = osp.join('res/cp', cp)
-    
-    if torch.cuda.is_available():
-        net.cuda()
-        net.load_state_dict(torch.load(save_pth))
-    else:
-        net.load_state_dict(torch.load(save_pth, map_location=lambda storage, loc: storage))
-
-
-    net.eval()
-
-    to_tensor = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-    ])
-    with torch.no_grad():
-        for image_path in os.listdir(dspth):
-            img = Image.open(osp.join(dspth, image_path))
-            image = img.resize((512, 512), Image.BILINEAR)
-            img = to_tensor(image)
-            img = torch.unsqueeze(img, 0)
-            if torch.cuda.is_available():
-                img = img.cuda()
-            out = net(img)[0]
-            if torch.cuda.is_available():
-                parsing = out.squeeze(0).cpu().numpy().argmax(0)
-            else:
-                parsing = out.squeeze(0).numpy().argmax(0)
-            # print(parsing)
-            print(np.unique(parsing))
-
-            vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
-
-
-
-
-
-
-
-if __name__ == "__main__":
-    evaluate(dspth='makeup/116_ori.png', cp='79999_iter.pth')
-
-
--- a/gimp-plugins/face-parsing.PyTorch/train.py
+++ b/gimp-plugins/face-parsing.PyTorch/train.py
@ -1,179 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-from logger import setup_logger
-from model import BiSeNet
-from face_dataset import FaceMask
-from loss import OhemCELoss
-from evaluate import evaluate
-from optimizer import Optimizer
-import cv2
-import numpy as np
-
-import torch
-import torch.nn as nn
-from torch.utils.data import DataLoader
-import torch.nn.functional as F
-import torch.distributed as dist
-
-import os
-import os.path as osp
-import logging
-import time
-import datetime
-import argparse
-
-
-respth = './res'
-if not osp.exists(respth):
-    os.makedirs(respth)
-logger = logging.getLogger()
-
-
-def parse_args():
-    parse = argparse.ArgumentParser()
-    parse.add_argument(
-            '--local_rank',
-            dest = 'local_rank',
-            type = int,
-            default = -1,
-            )
-    return parse.parse_args()
-
-
-def train():
-    args = parse_args()
-    torch.cuda.set_device(args.local_rank)
-    dist.init_process_group(
-                backend = 'nccl',
-                init_method = 'tcp://127.0.0.1:33241',
-                world_size = torch.cuda.device_count(),
-                rank=args.local_rank
-                )
-    setup_logger(respth)
-
-    # dataset
-    n_classes = 19
-    n_img_per_gpu = 16
-    n_workers = 8
-    cropsize = [448, 448]
-    data_root = '/home/zll/data/CelebAMask-HQ/'
-
-    ds = FaceMask(data_root, cropsize=cropsize, mode='train')
-    sampler = torch.utils.data.distributed.DistributedSampler(ds)
-    dl = DataLoader(ds,
-                    batch_size = n_img_per_gpu,
-                    shuffle = False,
-                    sampler = sampler,
-                    num_workers = n_workers,
-                    pin_memory = True,
-                    drop_last = True)
-
-    # model
-    ignore_idx = -100
-    net = BiSeNet(n_classes=n_classes)
-    net.cuda()
-    net.train()
-    net = nn.parallel.DistributedDataParallel(net,
-            device_ids = [args.local_rank, ],
-            output_device = args.local_rank
-            )
-    score_thres = 0.7
-    n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16
-    LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
-    Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
-    Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
-
-    ## optimizer
-    momentum = 0.9
-    weight_decay = 5e-4
-    lr_start = 1e-2
-    max_iter = 80000
-    power = 0.9
-    warmup_steps = 1000
-    warmup_start_lr = 1e-5
-    optim = Optimizer(
-            model = net.module,
-            lr0 = lr_start,
-            momentum = momentum,
-            wd = weight_decay,
-            warmup_steps = warmup_steps,
-            warmup_start_lr = warmup_start_lr,
-            max_iter = max_iter,
-            power = power)
-
-    ## train loop
-    msg_iter = 50
-    loss_avg = []
-    st = glob_st = time.time()
-    diter = iter(dl)
-    epoch = 0
-    for it in range(max_iter):
-        try:
-            im, lb = next(diter)
-            if not im.size()[0] == n_img_per_gpu:
-                raise StopIteration
-        except StopIteration:
-            epoch += 1
-            sampler.set_epoch(epoch)
-            diter = iter(dl)
-            im, lb = next(diter)
-        im = im.cuda()
-        lb = lb.cuda()
-        H, W = im.size()[2:]
-        lb = torch.squeeze(lb, 1)
-
-        optim.zero_grad()
-        out, out16, out32 = net(im)
-        lossp = LossP(out, lb)
-        loss2 = Loss2(out16, lb)
-        loss3 = Loss3(out32, lb)
-        loss = lossp + loss2 + loss3
-        loss.backward()
-        optim.step()
-
-        loss_avg.append(loss.item())
-
-        #  print training log message
-        if (it+1) % msg_iter == 0:
-            loss_avg = sum(loss_avg) / len(loss_avg)
-            lr = optim.lr
-            ed = time.time()
-            t_intv, glob_t_intv = ed - st, ed - glob_st
-            eta = int((max_iter - it) * (glob_t_intv / it))
-            eta = str(datetime.timedelta(seconds=eta))
-            msg = ', '.join([
-                    'it: {it}/{max_it}',
-                    'lr: {lr:4f}',
-                    'loss: {loss:.4f}',
-                    'eta: {eta}',
-                    'time: {time:.4f}',
-                ]).format(
-                    it = it+1,
-                    max_it = max_iter,
-                    lr = lr,
-                    loss = loss_avg,
-                    time = t_intv,
-                    eta = eta
-                )
-            logger.info(msg)
-            loss_avg = []
-            st = ed
-        if dist.get_rank() == 0:
-            if (it+1) % 5000 == 0:
-                state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
-                if dist.get_rank() == 0:
-                    torch.save(state, './res/cp/{}_iter.pth'.format(it))
-                evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it))
-
-    #  dump the final model
-    save_pth = osp.join(respth, 'model_final_diss.pth')
-    # net.cpu()
-    state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
-    if dist.get_rank() == 0:
-        torch.save(state, save_pth)
-    logger.info('training done, model saved to: {}'.format(save_pth))
-
-
-if __name__ == "__main__":
-    train()
--- a/gimp-plugins/face-parsing.PyTorch/transform.py
+++ b/gimp-plugins/face-parsing.PyTorch/transform.py
@ -1,129 +0,0 @@
-#!/usr/bin/python
-# -*- encoding: utf-8 -*-
-
-
-from PIL import Image
-import PIL.ImageEnhance as ImageEnhance
-import random
-import numpy as np
-
-class RandomCrop(object):
-    def __init__(self, size, *args, **kwargs):
-        self.size = size
-
-    def __call__(self, im_lb):
-        im = im_lb['im']
-        lb = im_lb['lb']
-        assert im.size == lb.size
-        W, H = self.size
-        w, h = im.size
-
-        if (W, H) == (w, h): return dict(im=im, lb=lb)
-        if w < W or h < H:
-            scale = float(W) / w if w < h else float(H) / h
-            w, h = int(scale * w + 1), int(scale * h + 1)
-            im = im.resize((w, h), Image.BILINEAR)
-            lb = lb.resize((w, h), Image.NEAREST)
-        sw, sh = random.random() * (w - W), random.random() * (h - H)
-        crop = int(sw), int(sh), int(sw) + W, int(sh) + H
-        return dict(
-                im = im.crop(crop),
-                lb = lb.crop(crop)
-                    )
-
-
-class HorizontalFlip(object):
-    def __init__(self, p=0.5, *args, **kwargs):
-        self.p = p
-
-    def __call__(self, im_lb):
-        if random.random() > self.p:
-            return im_lb
-        else:
-            im = im_lb['im']
-            lb = im_lb['lb']
-
-            # atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',
-            #         10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
-
-            flip_lb = np.array(lb)
-            flip_lb[lb == 2] = 3
-            flip_lb[lb == 3] = 2
-            flip_lb[lb == 4] = 5
-            flip_lb[lb == 5] = 4
-            flip_lb[lb == 7] = 8
-            flip_lb[lb == 8] = 7
-            flip_lb = Image.fromarray(flip_lb)
-            return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT),
-                        lb = flip_lb.transpose(Image.FLIP_LEFT_RIGHT),
-                    )
-
-
-class RandomScale(object):
-    def __init__(self, scales=(1, ), *args, **kwargs):
-        self.scales = scales
-
-    def __call__(self, im_lb):
-        im = im_lb['im']
-        lb = im_lb['lb']
-        W, H = im.size
-        scale = random.choice(self.scales)
-        w, h = int(W * scale), int(H * scale)
-        return dict(im = im.resize((w, h), Image.BILINEAR),
-                    lb = lb.resize((w, h), Image.NEAREST),
-                )
-
-
-class ColorJitter(object):
-    def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs):
-        if not brightness is None and brightness>0:
-            self.brightness = [max(1-brightness, 0), 1+brightness]
-        if not contrast is None and contrast>0:
-            self.contrast = [max(1-contrast, 0), 1+contrast]
-        if not saturation is None and saturation>0:
-            self.saturation = [max(1-saturation, 0), 1+saturation]
-
-    def __call__(self, im_lb):
-        im = im_lb['im']
-        lb = im_lb['lb']
-        r_brightness = random.uniform(self.brightness[0], self.brightness[1])
-        r_contrast = random.uniform(self.contrast[0], self.contrast[1])
-        r_saturation = random.uniform(self.saturation[0], self.saturation[1])
-        im = ImageEnhance.Brightness(im).enhance(r_brightness)
-        im = ImageEnhance.Contrast(im).enhance(r_contrast)
-        im = ImageEnhance.Color(im).enhance(r_saturation)
-        return dict(im = im,
-                    lb = lb,
-                )
-
-
-class MultiScale(object):
-    def __init__(self, scales):
-        self.scales = scales
-
-    def __call__(self, img):
-        W, H = img.size
-        sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales]
-        imgs = []
-        [imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes]
-        return imgs
-
-
-class Compose(object):
-    def __init__(self, do_list):
-        self.do_list = do_list
-
-    def __call__(self, im_lb):
-        for comp in self.do_list:
-            im_lb = comp(im_lb)
-        return im_lb
-
-
-
-
-if __name__ == '__main__':
-    flip = HorizontalFlip(p = 1)
-    crop = RandomCrop((321, 321))
-    rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0))
-    img = Image.open('data/img.jpg')
-    lb = Image.open('data/label.png')