diff --git a/configs/cityscapes_pointrend_deeplabv3_plus.yaml b/configs/cityscapes_pointrend_deeplabv3_plus.yaml deleted file mode 100644 index a818a6b..0000000 --- a/configs/cityscapes_pointrend_deeplabv3_plus.yaml +++ /dev/null @@ -1,23 +0,0 @@ -DATASET: - NAME: "cityscape" - MEAN: [0.5, 0.5, 0.5] - STD: [0.5, 0.5, 0.5] -TRAIN: - EPOCHS: 400 - BATCH_SIZE: 2 - CROP_SIZE: 768 -TEST: - BATCH_SIZE: 2 - CROP_SIZE: (1024, 2048) -# TEST_MODEL_PATH: trained_models/deeplabv3_plus_xception_segmentron.pth - -SOLVER: - LR: 0.01 - -MODEL: - MODEL_NAME: "PointRend" - BACKBONE: "xception65" - BN_EPS_FOR_ENCODER: 1e-3 - DEEPLABV3_PLUS: - ENABLE_DECODER: False - diff --git a/segmentron/config/config.py b/segmentron/config/config.py index a78e3b4..0fa905c 100644 --- a/segmentron/config/config.py +++ b/segmentron/config/config.py @@ -80,15 +80,11 @@ def remove_irrelevant_cfg(self): from ..models.model_zoo import MODEL_REGISTRY model_list = MODEL_REGISTRY.get_list() model_list_lower = [x.lower() for x in model_list] - + # print('model_list:', model_list) assert model_name.lower() in model_list_lower, "Expected model name in {}, but received {}"\ .format(model_list, model_name) pop_keys = [] for key in self.MODEL.keys(): - if key.lower() in model_list_lower: - if model_name.lower() == 'pointrend' and \ - key.lower() == self.MODEL.POINTREND.BASEMODEL.lower(): - continue if key.lower() in model_list_lower and key.lower() != model_name.lower(): pop_keys.append(key) for key in pop_keys: diff --git a/segmentron/config/settings.py b/segmentron/config/settings.py index 46769a9..4a9030f 100644 --- a/segmentron/config/settings.py +++ b/segmentron/config/settings.py @@ -174,9 +174,6 @@ cfg.MODEL.CGNET.STAGE2_BLOCK_NUM = 3 cfg.MODEL.CGNET.STAGE3_BLOCK_NUM = 21 -########################## PointRend config ################################## -cfg.MODEL.POINTREND.BASEMODEL = 'DeepLabV3_Plus' - ########################## hrnet config ###################################### cfg.MODEL.HRNET.PRETRAINED_LAYERS = ['*'] cfg.MODEL.HRNET.STEM_INPLANES = 64 diff --git a/segmentron/data/dataloader/pascal_aug.py b/segmentron/data/dataloader/pascal_aug.py index 71bc2e6..69509fe 100644 --- a/segmentron/data/dataloader/pascal_aug.py +++ b/segmentron/data/dataloader/pascal_aug.py @@ -73,8 +73,6 @@ def __getitem__(self, index): img, target = self._sync_transform(img, target) elif self.mode == 'val': img, target = self._val_sync_transform(img, target) - elif self.mode == 'testval': - img, target = self._val_sync_transform(img, target) else: raise RuntimeError('unknown mode for dataloader: {}'.format(self.mode)) # general resize, normalize and toTensor diff --git a/segmentron/data/dataloader/pascal_voc.py b/segmentron/data/dataloader/pascal_voc.py index c26b2e9..c184e4d 100644 --- a/segmentron/data/dataloader/pascal_voc.py +++ b/segmentron/data/dataloader/pascal_voc.py @@ -1,7 +1,6 @@ """Pascal VOC Semantic Segmentation Dataset.""" import os import torch -import logging import numpy as np from PIL import Image @@ -85,7 +84,6 @@ def __getitem__(self, index): img, mask = self._val_sync_transform(img, mask) else: assert self.mode == 'testval' - logging.warn("Use mode of testval, you should set batch size=1") img, mask = self._img_transform(img), self._mask_transform(mask) # general resize, normalize and toTensor if self.transform is not None: diff --git a/segmentron/models/__init__.py b/segmentron/models/__init__.py index 1361794..7c5166d 100644 --- a/segmentron/models/__init__.py +++ b/segmentron/models/__init__.py @@ -25,4 +25,3 @@ from .espnetv2 import ESPNetV2 from .enet import ENet from .edanet import EDANet -from .pointrend import PointRend diff --git a/segmentron/models/backbones/build.py b/segmentron/models/backbones/build.py index 4723572..0d60a40 100644 --- a/segmentron/models/backbones/build.py +++ b/segmentron/models/backbones/build.py @@ -3,7 +3,6 @@ import logging import torch.utils.model_zoo as model_zoo -from ...utils.download import download from ...utils.registry import Registry from ...config import cfg @@ -43,14 +42,7 @@ def load_backbone_pretrained(model, backbone): return else: logging.info('load backbone pretrained model from url..') - try: - msg = model.load_state_dict(model_zoo.load_url(model_urls[backbone]), strict=False) - except Exception as e: - logging.warning(e) - logging.info('Use torch download failed, try custom method!') - - msg = model.load_state_dict(torch.load(download(model_urls[backbone], - path=os.path.join(torch.hub._get_torch_home(), 'checkpoints'))), strict=False) + msg = model.load_state_dict(model_zoo.load_url(model_urls[backbone]), strict=False) logging.info(msg) diff --git a/segmentron/models/pointrend.py b/segmentron/models/pointrend.py deleted file mode 100644 index f57b5a3..0000000 --- a/segmentron/models/pointrend.py +++ /dev/null @@ -1,166 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from torchvision.models._utils import IntermediateLayerGetter -from .model_zoo import MODEL_REGISTRY -from .segbase import SegBaseModel -from ..config import cfg - - -@MODEL_REGISTRY.register(name='PointRend') -class PointRend(SegBaseModel): - def __init__(self): - super(PointRend, self).__init__(need_backbone=False) - model_name = cfg.MODEL.POINTREND.BASEMODEL - self.backbone = MODEL_REGISTRY.get(model_name)() - - self.head = PointHead(num_classes=self.nclass) - - def forward(self, x): - c1, _, _, c4 = self.backbone.encoder(x) - - out = self.backbone.head(c4, c1) - - result = {'res2': c1, 'coarse': out} - result.update(self.head(x, result["res2"], result["coarse"])) - if not self.training: - return (result['fine'],) - return result - - -class PointHead(nn.Module): - def __init__(self, in_c=275, num_classes=19, k=3, beta=0.75): - super().__init__() - self.mlp = nn.Conv1d(in_c, num_classes, 1) - self.k = k - self.beta = beta - - def forward(self, x, res2, out): - """ - 1. Fine-grained features are interpolated from res2 for DeeplabV3 - 2. During training we sample as many points as there are on a stride 16 feature map of the input - 3. To measure prediction uncertainty - we use the same strategy during training and inference: the difference between the most - confident and second most confident class probabilities. - """ - if not self.training: - return self.inference(x, res2, out) - - points = sampling_points(out, x.shape[-1] // 16, self.k, self.beta) - - coarse = point_sample(out, points, align_corners=False) - fine = point_sample(res2, points, align_corners=False) - - feature_representation = torch.cat([coarse, fine], dim=1) - - rend = self.mlp(feature_representation) - - return {"rend": rend, "points": points} - - @torch.no_grad() - def inference(self, x, res2, out): - """ - During inference, subdivision uses N=8096 - (i.e., the number of points in the stride 16 map of a 1024×2048 image) - """ - num_points = 8096 - - while out.shape[-1] != x.shape[-1]: - out = F.interpolate(out, scale_factor=2, mode="bilinear", align_corners=True) - - points_idx, points = sampling_points(out, num_points, training=self.training) - - coarse = point_sample(out, points, align_corners=False) - fine = point_sample(res2, points, align_corners=False) - - feature_representation = torch.cat([coarse, fine], dim=1) - - rend = self.mlp(feature_representation) - - B, C, H, W = out.shape - points_idx = points_idx.unsqueeze(1).expand(-1, C, -1) - out = (out.reshape(B, C, -1) - .scatter_(2, points_idx, rend) - .view(B, C, H, W)) - - return {"fine": out} - - -def point_sample(input, point_coords, **kwargs): - """ - From Detectron2, point_features.py#19 - A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors. - Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside - [0, 1] x [0, 1] square. - Args: - input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid. - point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains - [0, 1] x [0, 1] normalized point coordinates. - Returns: - output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains - features for points in `point_coords`. The features are obtained via bilinear - interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`. - """ - add_dim = False - if point_coords.dim() == 3: - add_dim = True - point_coords = point_coords.unsqueeze(2) - output = F.grid_sample(input, 2.0 * point_coords - 1.0)#, **kwargs) - if add_dim: - output = output.squeeze(3) - return output - - -@torch.no_grad() -def sampling_points(mask, N, k=3, beta=0.75, training=True): - """ - Follows 3.1. Point Selection for Inference and Training - In Train:, `The sampling strategy selects N points on a feature map to train on.` - In Inference, `then selects the N most uncertain points` - Args: - mask(Tensor): [B, C, H, W] - N(int): `During training we sample as many points as there are on a stride 16 feature map of the input` - k(int): Over generation multiplier - beta(float): ratio of importance points - training(bool): flag - Return: - selected_point(Tensor) : flattened indexing points [B, num_points, 2] - """ - assert mask.dim() == 4, "Dim must be N(Batch)CHW" - device = mask.device - B, _, H, W = mask.shape - mask, _ = mask.sort(1, descending=True) - - if not training: - H_step, W_step = 1 / H, 1 / W - N = min(H * W, N) - uncertainty_map = -1 * (mask[:, 0] - mask[:, 1]) - _, idx = uncertainty_map.view(B, -1).topk(N, dim=1) - - points = torch.zeros(B, N, 2, dtype=torch.float, device=device) - points[:, :, 0] = W_step / 2.0 + (idx % W).to(torch.float) * W_step - points[:, :, 1] = H_step / 2.0 + (idx // W).to(torch.float) * H_step - return idx, points - - # Official Comment : point_features.py#92 - # It is crucial to calculate uncertanty based on the sampled prediction value for the points. - # Calculating uncertainties of the coarse predictions first and sampling them for points leads - # to worse results. To illustrate the difference: a sampled point between two coarse predictions - # with -1 and 1 logits has 0 logit prediction and therefore 0 uncertainty value, however, if one - # calculates uncertainties for the coarse predictions first (-1 and -1) and sampe it for the - # center point, they will get -1 unceratinty. - - over_generation = torch.rand(B, k * N, 2, device=device) - over_generation_map = point_sample(mask, over_generation, align_corners=False) - - uncertainty_map = -1 * (over_generation_map[:, 0] - over_generation_map[:, 1]) - _, idx = uncertainty_map.topk(int(beta * N), -1) - - shift = (k * N) * torch.arange(B, dtype=torch.long, device=device) - - idx += shift[:, None] - - importance = over_generation.view(-1, 2)[idx.view(-1), :].view(B, int(beta * N), 2) - coverage = torch.rand(B, N - int(beta * N), 2, device=device) - return torch.cat([importance, coverage], 1).to(device) \ No newline at end of file diff --git a/segmentron/solver/loss.py b/segmentron/solver/loss.py index c1c5029..9da2ee4 100644 --- a/segmentron/solver/loss.py +++ b/segmentron/solver/loss.py @@ -6,7 +6,6 @@ from torch.autograd import Variable from .lovasz_losses import lovasz_softmax -from ..models.pointrend import point_sample from ..data.dataloader import datasets from ..config import cfg @@ -361,32 +360,6 @@ def forward(self, *inputs): return dict(loss=self._aux_forward(*inputs)) -class PointRendLoss(nn.CrossEntropyLoss): - def __init__(self, aux=True, aux_weight=0.2, ignore_index=-1, **kwargs): - super(PointRendLoss, self).__init__(ignore_index=ignore_index) - self.aux = aux - self.aux_weight = aux_weight - self.ignore_index = ignore_index - - def forward(self, *inputs, **kwargs): - result, gt = tuple(inputs) - - pred = F.interpolate(result["coarse"], gt.shape[-2:], mode="bilinear", align_corners=True) - seg_loss = F.cross_entropy(pred, gt, ignore_index=self.ignore_index) - - gt_points = point_sample( - gt.float().unsqueeze(1), - result["points"], - mode="nearest", - align_corners=False - ).squeeze_(1).long() - points_loss = F.cross_entropy(result["rend"], gt_points, ignore_index=self.ignore_index) - - loss = seg_loss + points_loss - - return dict(loss=loss) - - def get_segmentation_loss(model, use_ohem=False, **kwargs): if use_ohem: return MixSoftmaxCrossEntropyOHEMLoss(**kwargs) @@ -400,13 +373,11 @@ def get_segmentation_loss(model, use_ohem=False, **kwargs): logging.info('Use dice loss!') return DiceLoss(**kwargs) + model = model.lower() if model == 'icnet': return ICNetLoss(**kwargs) elif model == 'encnet': return EncNetLoss(**kwargs) - elif model == 'pointrend': - logging.info('Use pointrend loss!') - return PointRendLoss(**kwargs) else: return MixSoftmaxCrossEntropyLoss(**kwargs) diff --git a/segmentron/utils/score.py b/segmentron/utils/score.py index a1f145b..09cf09d 100644 --- a/segmentron/utils/score.py +++ b/segmentron/utils/score.py @@ -30,7 +30,10 @@ def update(self, preds, labels): """ def reduce_tensor(tensor): - rt = tensor.clone() + if isinstance(tensor, torch.Tensor): + rt = tensor.clone() + else: + rt = copy.deepcopy(tensor) dist.all_reduce(rt, op=dist.ReduceOp.SUM) return rt diff --git a/tools/eval.py b/tools/eval.py index 85d1b14..bff9b50 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -47,10 +47,9 @@ def __init__(self, args): # create network self.model = get_segmentation_model().to(self.device) - if hasattr(self.model, 'encoder') and hasattr(self.model.encoder, 'named_modules') and \ - cfg.MODEL.BN_EPS_FOR_ENCODER: - logging.info('set bn custom eps for bn in encoder: {}'.format(cfg.MODEL.BN_EPS_FOR_ENCODER)) - self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) + if hasattr(self.model, 'encoder') and cfg.MODEL.BN_EPS_FOR_ENCODER: + logging.info('set bn custom eps for bn in encoder: {}'.format(cfg.MODEL.BN_EPS_FOR_ENCODER)) + self.set_batch_norm_attr(self.model.encoder.named_modules(), 'eps', cfg.MODEL.BN_EPS_FOR_ENCODER) if args.distributed: self.model = nn.parallel.DistributedDataParallel(self.model,