Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trainers: add Instance Segmentation Task #2513

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions tests/trainers/test_instancesegmentation.py
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's rename to test_instance_segmentation.py to match the other filename

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import torch
import pytorch_lightning as pl
from pytorch_lightning import LightningModule

Check failure on line 3 in tests/trainers/test_instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

tests/trainers/test_instancesegmentation.py:3:31: F401 `pytorch_lightning.LightningModule` imported but unused
from torch.utils.data import DataLoader
from torchgeo.datasets import VHR10
from torchgeo.trainers import InstanceSegmentationTask


# Custom collate function for DataLoader (required for Mask R-CNN models)

Check failure on line 9 in tests/trainers/test_instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

tests/trainers/test_instancesegmentation.py:1:1: I001 Import block is un-sorted or un-formatted
def collate_fn(batch):

Check failure on line 10 in tests/trainers/test_instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN201)

tests/trainers/test_instancesegmentation.py:10:5: ANN201 Missing return type annotation for public function `collate_fn`

Check failure on line 10 in tests/trainers/test_instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ANN001)

tests/trainers/test_instancesegmentation.py:10:16: ANN001 Missing type annotation for function argument `batch`
return tuple(zip(*batch))

# Initialize the VHR10 dataset
train_dataset = VHR10(root="data", split="positive", transforms=None, download=True)
val_dataset = VHR10(root="data", split="positive", transforms=None)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

# Initialize the InstanceSegmentationTask
task = InstanceSegmentationTask(
model="mask_rcnn", # Use Mask R-CNN as the model
backbone="resnet50", # ResNet-50 as the backbone
weights=True, # Use pretrained weights
num_classes=11, # 10 object classes in VHR10 + 1 background class
lr=1e-3, # Learning rate
freeze_backbone=False # Allow training the backbone
)

# Set up PyTorch Lightning Trainer
trainer = pl.Trainer(
max_epochs=10,
accelerator="gpu" if torch.cuda.is_available() else "cpu",
devices=1
)

# Train the model
trainer.fit(task, train_dataloaders=train_loader, val_dataloaders=val_loader)

# Evaluate the model
trainer.test(task, dataloaders=val_loader)

# Example inference
test_sample = train_dataset[0]
test_image = test_sample["image"].unsqueeze(0) # Add batch dimension
predictions = task.predict_step({"image": test_image}, batch_idx=0)
print(predictions)
174 changes: 174 additions & 0 deletions torchgeo/trainers/instancesegmentation.py
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
from typing import Any

Check failure on line 1 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (D100)

torchgeo/trainers/instancesegmentation.py:1:1: D100 Missing docstring in public module
import torch.nn as nn
import torch

Check failure on line 3 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

torchgeo/trainers/instancesegmentation.py:3:8: F401 `torch` imported but unused
from torch import Tensor
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from torchvision.models.detection import maskrcnn_resnet50_fpn
from ultralytics import YOLO
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
from .base import BaseTask

class InstanceSegmentationTask(BaseTask):

Check failure on line 10 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

torchgeo/trainers/instancesegmentation.py:1:1: I001 Import block is un-sorted or un-formatted
"""
Task class for training and evaluating instance segmentation models.

This class supports Mask R-CNN and YOLO models and handles the following:
- Model configuration
- Loss computation
- Metric computation (e.g., Mean Average Precision)
- Training, validation, testing, and prediction steps
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
"""

Check failure on line 19 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (D212)

torchgeo/trainers/instancesegmentation.py:11:5: D212 Multi-line docstring summary should start at the first line

def __init__(
self,
model: str = 'mask_rcnn', # Model type, e.g., 'mask_rcnn' or 'yolo'
backbone: str = 'resnet50', # Backbone type for Mask R-CNN (ignored for YOLO)
weights: str | bool | None = None, # Pretrained weights or custom checkpoint path
num_classes: int = 2, # Number of classes, including background
lr: float = 1e-3, # Learning rate for the optimizer
patience: int = 10, # Patience for the learning rate scheduler
freeze_backbone: bool = False, # Whether to freeze backbone layers (useful for transfer learning)
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""
Constructor for the InstanceSegmentationTask.

Initializes the hyperparameters, sets up the model and metrics.
"""

Check failure on line 35 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (D212)

torchgeo/trainers/instancesegmentation.py:31:9: D212 Multi-line docstring summary should start at the first line
self.weights = weights # Save weights for model initialization
super().__init__() # Initialize the BaseTask class (inherits common functionality)
self.save_hyperparameters() # Save input arguments for later use (e.g., in checkpoints or logs)
self.model = None # Placeholder for the model (to be initialized later)
self.validation_outputs = [] # List to store outputs during validation (used for debugging or analysis)
self.test_outputs = [] # List to store outputs during testing
self.configure_models() # Call method to set up the model
self.configure_metrics() # Call method to set up metrics

def configure_models(self) -> None:
"""
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
Set up the instance segmentation model based on the specified type (Mask R-CNN or YOLO).

Configures:
- Backbone (for Mask R-CNN)
- Classifier and mask heads
- Pretrained weights
"""

Check failure on line 53 in torchgeo/trainers/instancesegmentation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (D212)

torchgeo/trainers/instancesegmentation.py:46:9: D212 Multi-line docstring summary should start at the first line
model = self.hparams['model'].lower() # Read the model type from hyperparameters (convert to lowercase)
num_classes = self.hparams['num_classes'] # Number of output classes

if model == 'mask_rcnn':
# Load the Mask R-CNN model with a ResNet50 backbone
self.model = maskrcnn_resnet50_fpn(pretrained=self.weights is True)

# Update the classification head to predict `num_classes`
in_features = self.model.roi_heads.box_predictor.cls_score.in_features
self.model.roi_heads.box_predictor = nn.Linear(in_features, num_classes)

# Update the mask head for instance segmentation
in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
self.model.roi_heads.mask_predictor = nn.ConvTranspose2d(
in_features_mask, num_classes, kernel_size=2, stride=2
)

elif model == 'yolo':
# Initialize YOLOv8 for instance segmentation
self.model = YOLO('yolov8n-seg') # Load a small YOLOv8 segmentation model
self.model.model.args['nc'] = num_classes # Set the number of classes in YOLO
if self.weights:
# If weights are provided, load the custom checkpoint
self.model = YOLO(self.weights)

else:
raise ValueError(
f"Invalid model type '{model}'. Supported models: 'mask_rcnn', 'yolo'."
)

# Freeze the backbone if specified (useful for transfer learning)
if self.hparams['freeze_backbone'] and model == 'mask_rcnn':
for param in self.model.backbone.parameters():
param.requires_grad = False # Prevent these layers from being updated during training

def configure_metrics(self) -> None:
"""
Set up metrics for evaluating instance segmentation models.

- Uses Mean Average Precision (mAP) for masks (IOU-based metric).
"""
self.metrics = MeanAveragePrecision(iou_type="segm") # Track segmentation-specific mAP

def training_step(self, batch: Any, batch_idx: int) -> Tensor:
"""
Perform a single training step.

Args:
batch: A batch of data from the DataLoader. Includes images and ground truth targets.
batch_idx: Index of the current batch.

Returns:
The total loss for the batch.
"""
images, targets = batch['image'], batch['target'] # Unpack images and targets
loss_dict = self.model(images, targets) # Compute losses (classification, box regression, mask loss, etc.)
loss = sum(loss for loss in loss_dict.values()) # Combine all losses into a single value
self.log('train_loss', loss, batch_size=len(images)) # Log the training loss for monitoring
return loss # Return the loss for optimization

def validation_step(self, batch: Any, batch_idx: int) -> None:
"""
Perform a single validation step.

Args:
batch: A batch of data from the DataLoader. Includes images and targets.
batch_idx: Index of the current batch.

Updates metrics and stores predictions/targets for further analysis.
"""
images, targets = batch['image'], batch['target'] # Unpack images and targets
outputs = self.model(images) # Run inference on the model
self.metrics.update(outputs, targets) # Update mAP metrics with predictions and ground truths
self.validation_outputs.append((outputs, targets)) # Store outputs for debugging or visualization

def on_validation_epoch_end(self) -> None:
"""
At the end of the validation epoch, compute and log metrics.

Resets the stored outputs to free memory.
"""
metrics_dict = self.metrics.compute() # Calculate final mAP and other metrics
self.log_dict(metrics_dict) # Log all computed metrics
self.metrics.reset() # Reset metrics for the next epoch
self.validation_outputs.clear() # Clear stored outputs to free memory

def test_step(self, batch: Any, batch_idx: int) -> None:
"""
Perform a single test step.

Similar to validation but used for test data.
"""
images, targets = batch['image'], batch['target']
outputs = self.model(images)
self.metrics.update(outputs, targets)
self.test_outputs.append((outputs, targets))

def on_test_epoch_end(self) -> None:
"""
At the end of the test epoch, compute and log metrics.

Resets the stored outputs to free memory.
"""
metrics_dict = self.metrics.compute()
self.log_dict(metrics_dict)
self.metrics.reset()
self.test_outputs.clear()

def predict_step(self, batch: Any, batch_idx: int) -> Tensor:
"""
Perform inference on a batch of images.

Args:
batch: A batch of images.

Returns:
Predicted masks and bounding boxes for the batch.
"""
images = batch['image'] # Extract images from the batch
predictions = self.model(images) # Run inference on the model
return predictions # Return the predictions
Loading