-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support MaskDINO COCO instance/panoptic segmentation (#154)
* add maskdino * delete useless op * add MaskDINO coco panoptic * add README for dino and bound to v0.2.1 Co-authored-by: hao zhang <[email protected]>
- Loading branch information
1 parent
57d7527
commit 579a240
Showing
56 changed files
with
9,512 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# coding=utf-8 | ||
# Copyright 2022 The IDEA Authors. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from .coco_instance_new_baseline_dataset_mapper import build_transform_gen as coco_instance_transform_gen | ||
from .coco_panoptic_new_baseline_dataset_mapper import build_transform_gen as coco_panoptic_transform_gen | ||
from .coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper | ||
from .coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper | ||
from .mask_former_instance_dataset_mapper import MaskFormerInstanceDatasetMapper | ||
from .mask_former_panoptic_dataset_mapper import MaskFormerPanopticDatasetMapper | ||
from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper |
179 changes: 179 additions & 0 deletions
179
detrex/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
# coding=utf-8 | ||
# Copyright 2022 The IDEA Authors. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ------------------------------------------------------------------------------------------------ | ||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | ||
# ------------------------------------------------------------------------------------------------ | ||
# COCO Instance Segmentation with LSJ Augmentation | ||
# Modified from: | ||
# https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py | ||
# ------------------------------------------------------------------------------------------------ | ||
|
||
import copy | ||
import logging | ||
import numpy as np | ||
import torch | ||
|
||
from detectron2.data import detection_utils as utils | ||
from detectron2.data import transforms as T | ||
|
||
from pycocotools import mask as coco_mask | ||
|
||
|
||
def convert_coco_poly_to_mask(segmentations, height, width): | ||
masks = [] | ||
for polygons in segmentations: | ||
rles = coco_mask.frPyObjects(polygons, height, width) | ||
mask = coco_mask.decode(rles) | ||
if len(mask.shape) < 3: | ||
mask = mask[..., None] | ||
mask = torch.as_tensor(mask, dtype=torch.uint8) | ||
mask = mask.any(dim=2) | ||
masks.append(mask) | ||
if masks: | ||
masks = torch.stack(masks, dim=0) | ||
else: | ||
masks = torch.zeros((0, height, width), dtype=torch.uint8) | ||
return masks | ||
|
||
|
||
def build_transform_gen( | ||
image_size, | ||
min_scale, | ||
max_scale, | ||
random_flip: str = "horizontal", | ||
is_train: bool = True, | ||
): | ||
""" | ||
Create a list of default :class:`Augmentation`. | ||
Now it includes resizing and flipping. | ||
Returns: | ||
list[Augmentation] | ||
""" | ||
assert is_train, "Only support training augmentation." | ||
assert random_flip in ["none", "horizontal", "vertical"], f"Only support none/horizontal/vertical flip, but got {random_flip}" | ||
|
||
augmentation = [] | ||
|
||
if random_flip != "none": | ||
augmentation.append( | ||
T.RandomFlip( | ||
horizontal=random_flip == "horizontal", | ||
vertical=random_flip == "vertical", | ||
) | ||
) | ||
|
||
augmentation.extend([ | ||
T.ResizeScale( | ||
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size, | ||
), | ||
T.FixedSizeCrop(crop_size=(image_size, image_size)) | ||
]) | ||
|
||
return augmentation | ||
|
||
|
||
class COCOInstanceNewBaselineDatasetMapper: | ||
""" | ||
A callable which takes a dataset dict in Detectron2 Dataset format, | ||
and map it into a format used by MaskFormer. | ||
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. | ||
The callable currently does the following: | ||
1. Read the image from "file_name" | ||
2. Applies geometric transforms to the image and annotation | ||
3. Find and applies suitable cropping to the image and annotation | ||
4. Prepare image and annotation to Tensors | ||
""" | ||
def __init__( | ||
self, | ||
is_train=True, | ||
*, | ||
augmentation, | ||
image_format, | ||
): | ||
self.augmentation = augmentation | ||
logging.getLogger(__name__).info( | ||
"[COCO_Instance_LSJ_Augment_Dataset_Mapper] Full TransformGens used in training: {}".format(str(self.augmentation)) | ||
) | ||
|
||
self.img_format = image_format | ||
self.is_train = is_train | ||
|
||
def __call__(self, dataset_dict): | ||
""" | ||
Args: | ||
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. | ||
Returns: | ||
dict: a format that builtin models in detectron2 accept | ||
""" | ||
dataset_dict = copy.deepcopy(dataset_dict) | ||
image = utils.read_image(dataset_dict["file_name"], format=self.img_format) | ||
utils.check_image_size(dataset_dict, image) | ||
|
||
padding_mask = np.ones(image.shape[:2]) | ||
image, transforms = T.apply_transform_gens(self.augmentation, image) | ||
|
||
padding_mask = transforms.apply_segmentation(padding_mask) | ||
padding_mask = ~ padding_mask.astype(bool) | ||
|
||
image_shape = image.shape[:2] | ||
|
||
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, | ||
# but not efficient on large generic data structures due to the use of pickle & mp.Queue. | ||
# Therefore it's important to use torch.Tensor. | ||
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) | ||
dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask)) | ||
|
||
if not self.is_train: | ||
# USER: Modify this if you want to keep them for some reason. | ||
dataset_dict.pop("annotations", None) | ||
return dataset_dict | ||
|
||
if "annotations" in dataset_dict: | ||
for anno in dataset_dict["annotations"]: | ||
anno.pop("keypoints", None) | ||
|
||
annos = [ | ||
utils.transform_instance_annotations(obj, transforms, image_shape) | ||
for obj in dataset_dict.pop("annotations") | ||
if obj.get("iscrowd", 0) == 0 | ||
] | ||
# NOTE: does not support BitMask due to augmentation | ||
# Current BitMask cannot handle empty objects | ||
instances = utils.annotations_to_instances(annos, image_shape) | ||
# After transforms such as cropping are applied, the bounding box may no longer | ||
# tightly bound the object. As an example, imagine a triangle object | ||
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight | ||
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to | ||
# the intersection of original bounding box and the cropping box. | ||
instances.gt_boxes = instances.gt_masks.get_bounding_boxes() | ||
# Need to filter empty instances first (due to augmentation) | ||
instances = utils.filter_empty_instances(instances) | ||
# Generate masks from polygon | ||
h, w = instances.image_size | ||
# image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float) | ||
if hasattr(instances, 'gt_masks'): | ||
gt_masks = instances.gt_masks | ||
gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w) | ||
instances.gt_masks = gt_masks | ||
# import ipdb; ipdb.set_trace() | ||
dataset_dict["instances"] = instances | ||
|
||
return dataset_dict | ||
|
173 changes: 173 additions & 0 deletions
173
detrex/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
# coding=utf-8 | ||
# Copyright 2022 The IDEA Authors. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ------------------------------------------------------------------------------------------------ | ||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | ||
# ------------------------------------------------------------------------------------------------ | ||
# COCO Panoptic Segmentation with LSJ Augmentation | ||
# Modified from: | ||
# https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py | ||
# ------------------------------------------------------------------------------------------------ | ||
|
||
import copy | ||
import logging | ||
|
||
import numpy as np | ||
import torch | ||
|
||
from detectron2.config import configurable | ||
from detectron2.data import detection_utils as utils | ||
from detectron2.data import transforms as T | ||
from detectron2.data.transforms import TransformGen | ||
from detectron2.structures import BitMasks, Boxes, Instances | ||
|
||
__all__ = ["COCOPanopticNewBaselineDatasetMapper"] | ||
|
||
|
||
def build_transform_gen( | ||
image_size, | ||
min_scale, | ||
max_scale, | ||
random_flip: str = "horizontal", | ||
is_train: bool = True, | ||
): | ||
""" | ||
Create a list of default :class:`Augmentation` from config. | ||
Now it includes resizing and flipping. | ||
Returns: | ||
list[Augmentation] | ||
""" | ||
assert is_train, "Only support training augmentation" | ||
|
||
augmentation = [] | ||
|
||
if random_flip != "none": | ||
augmentation.append( | ||
T.RandomFlip( | ||
horizontal=random_flip == "horizontal", | ||
vertical=random_flip == "vertical", | ||
) | ||
) | ||
|
||
augmentation.extend([ | ||
T.ResizeScale( | ||
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size | ||
), | ||
T.FixedSizeCrop(crop_size=(image_size, image_size)), | ||
]) | ||
|
||
return augmentation | ||
|
||
|
||
# This is specifically designed for the COCO dataset. | ||
class COCOPanopticNewBaselineDatasetMapper: | ||
""" | ||
A callable which takes a dataset dict in Detectron2 Dataset format, | ||
and map it into a format used by MaskFormer. | ||
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. | ||
The callable currently does the following: | ||
1. Read the image from "file_name" | ||
2. Applies geometric transforms to the image and annotation | ||
3. Find and applies suitable cropping to the image and annotation | ||
4. Prepare image and annotation to Tensors | ||
""" | ||
|
||
def __init__( | ||
self, | ||
is_train=True, | ||
*, | ||
augmentation, | ||
image_format, | ||
): | ||
""" | ||
NOTE: this interface is experimental. | ||
Args: | ||
is_train: for training or inference | ||
augmentations: a list of augmentations or deterministic transforms to apply | ||
crop_gen: crop augmentation | ||
tfm_gens: data augmentation | ||
image_format: an image format supported by :func:`detection_utils.read_image`. | ||
""" | ||
self.augmentation = augmentation | ||
logging.getLogger(__name__).info( | ||
"[COCOPanopticNewBaselineDatasetMapper] Full TransformGens used in training: {}".format( | ||
str(self.augmentation) | ||
) | ||
) | ||
|
||
self.img_format = image_format | ||
self.is_train = is_train | ||
|
||
|
||
def __call__(self, dataset_dict): | ||
""" | ||
Args: | ||
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. | ||
Returns: | ||
dict: a format that builtin models in detectron2 accept | ||
""" | ||
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below | ||
image = utils.read_image(dataset_dict["file_name"], format=self.img_format) | ||
utils.check_image_size(dataset_dict, image) | ||
|
||
image, transforms = T.apply_transform_gens(self.augmentation, image) | ||
image_shape = image.shape[:2] # h, w | ||
|
||
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, | ||
# but not efficient on large generic data structures due to the use of pickle & mp.Queue. | ||
# Therefore it's important to use torch.Tensor. | ||
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) | ||
|
||
if not self.is_train: | ||
# USER: Modify this if you want to keep them for some reason. | ||
dataset_dict.pop("annotations", None) | ||
return dataset_dict | ||
|
||
if "pan_seg_file_name" in dataset_dict: | ||
pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") | ||
segments_info = dataset_dict["segments_info"] | ||
|
||
# apply the same transformation to panoptic segmentation | ||
pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) | ||
|
||
from panopticapi.utils import rgb2id | ||
|
||
pan_seg_gt = rgb2id(pan_seg_gt) | ||
|
||
instances = Instances(image_shape) | ||
classes = [] | ||
masks = [] | ||
for segment_info in segments_info: | ||
class_id = segment_info["category_id"] | ||
if not segment_info["iscrowd"]: | ||
classes.append(class_id) | ||
masks.append(pan_seg_gt == segment_info["id"]) | ||
|
||
classes = np.array(classes) | ||
instances.gt_classes = torch.tensor(classes, dtype=torch.int64) | ||
if len(masks) == 0: | ||
# Some image does not have annotation (all ignored) | ||
instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) | ||
instances.gt_boxes = Boxes(torch.zeros((0, 4))) | ||
else: | ||
masks = BitMasks( | ||
torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) | ||
) | ||
instances.gt_masks = masks.tensor | ||
instances.gt_boxes = masks.get_bounding_boxes() | ||
|
||
dataset_dict["instances"] = instances | ||
|
||
return dataset_dict | ||
|
Oops, something went wrong.