Merge pull request #117 from streeve/model_kwargs

Simplify model parameters
ORNL · Mar 17, 2022 · 025b39d · 025b39d
2 parents e89523e + 89fb9a0
commit 025b39d
Show file tree

Hide file tree

Showing 7 changed files with 80 additions and 183 deletions.
diff --git a/hydragnn/models/Base.py b/hydragnn/models/Base.py
@@ -24,10 +24,10 @@ def __init__(
         hidden_dim: int,
         output_dim: list,
         output_type: list,
-        config_heads: {},
-        ilossweights_hyperp: int,
-        loss_weights: list,
-        ilossweights_nll: int,
+        config_heads: dict,
+        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
+        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
+        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper# https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
         freeze_conv=False,
         dropout: float = 0.25,
         num_conv_layers: int = 16,

diff --git a/hydragnn/models/CGCNNStack.py b/hydragnn/models/CGCNNStack.py
@@ -19,37 +19,26 @@
 class CGCNNStack(Base):
     def __init__(
         self,
-        input_dim: int,
-        output_dim: list,
-        output_type: list,
-        num_nodes: int,
-        config_heads: {},
-        freeze_conv=False,
-        edge_dim: int = 0,
-        dropout: float = 0.25,
-        num_conv_layers: int = 16,
-        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
-        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
-        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper
-        # https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
+        edge_dim: int,
+        input_dim,
+        output_dim,
+        output_type,
+        config_heads,
+        **kwargs,
     ):
         self.edge_dim = edge_dim
 
         # CGCNN does not change embedding dimensions
-        # We use input dimension (first argument of constructor) also as hidden dimension (second argument of constructor)
+        # We use input dimension (first argument of base constructor)
+        #    also as hidden dimension (second argument of base constructor)
+        # We therefore pass all required args explicitly.
         super().__init__(
             input_dim,
             input_dim,
             output_dim,
             output_type,
             config_heads,
-            ilossweights_hyperp,
-            loss_weights,
-            ilossweights_nll,
-            freeze_conv,
-            dropout,
-            num_conv_layers,
-            num_nodes,
+            **kwargs,
         )
 
     def get_conv(self, input_dim, _):

diff --git a/hydragnn/models/GATStack.py b/hydragnn/models/GATStack.py
@@ -21,39 +21,16 @@
 class GATStack(Base):
     def __init__(
         self,
-        input_dim: int,
-        output_dim: list,
-        output_type: list,
-        num_nodes: int,
-        hidden_dim: int,
-        config_heads: {},
-        freeze_conv=False,
-        heads: int = 6,
-        negative_slope: float = 0.05,
-        dropout: float = 0.25,
-        num_conv_layers: int = 16,
-        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
-        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
-        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper# https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
+        heads: int,
+        negative_slope: float,
+        *args,
+        **kwargs,
     ):
         # note that self.heads is a parameter in GATConv, not the num_heads in the output part
         self.heads = heads
         self.negative_slope = negative_slope
 
-        super().__init__(
-            input_dim,
-            hidden_dim,
-            output_dim,
-            output_type,
-            config_heads,
-            ilossweights_hyperp,
-            loss_weights,
-            ilossweights_nll,
-            freeze_conv,
-            dropout,
-            num_conv_layers,
-            num_nodes,
-        )
+        super().__init__(*args, **kwargs)
 
     def _init_conv(self):
         """Here this function overwrites _init_conv() in Base since it has different implementation

diff --git a/hydragnn/models/GINStack.py b/hydragnn/models/GINStack.py
@@ -19,36 +19,8 @@
 
 
 class GINStack(Base):
-    def __init__(
-        self,
-        input_dim: int,
-        output_dim: list,
-        output_type: list,
-        num_nodes: int,
-        hidden_dim: int,
-        config_heads: {},
-        freeze_conv=False,
-        dropout: float = 0.25,
-        num_conv_layers: int = 16,
-        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
-        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
-        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper
-        # https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
-    ):
-        super().__init__(
-            input_dim,
-            hidden_dim,
-            output_dim,
-            output_type,
-            config_heads,
-            ilossweights_hyperp,
-            loss_weights,
-            ilossweights_nll,
-            freeze_conv,
-            dropout,
-            num_conv_layers,
-            num_nodes,
-        )
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
 
     def get_conv(self, input_dim, output_dim):
         return GINConv(

diff --git a/hydragnn/models/MFCStack.py b/hydragnn/models/MFCStack.py
@@ -21,37 +21,13 @@
 class MFCStack(Base):
     def __init__(
         self,
-        input_dim: int,
-        output_dim: list,
-        output_type: list,
-        num_nodes: int,
         max_degree: int,
-        hidden_dim: int,
-        config_heads: {},
-        freeze_conv=False,
-        dropout: float = 0.25,
-        num_conv_layers: int = 16,
-        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
-        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
-        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper
-        # https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
+        *args,
+        **kwargs,
     ):
         self.max_degree = max_degree
 
-        super().__init__(
-            input_dim,
-            hidden_dim,
-            output_dim,
-            output_type,
-            config_heads,
-            ilossweights_hyperp,
-            loss_weights,
-            ilossweights_nll,
-            freeze_conv,
-            dropout,
-            num_conv_layers,
-            num_nodes,
-        )
+        super().__init__(*args, **kwargs)
 
     def get_conv(self, input_dim, output_dim):
         return MFConv(

diff --git a/hydragnn/models/PNAStack.py b/hydragnn/models/PNAStack.py
@@ -20,20 +20,9 @@ class PNAStack(Base):
     def __init__(
         self,
         deg: list,
-        input_dim: int,
-        output_dim: list,
-        output_type: list,
-        num_nodes: int,
-        hidden_dim: int,
-        config_heads: {},
-        freeze_conv=False,
-        edge_dim: int = None,
-        dropout: float = 0.25,
-        num_conv_layers: int = 16,
-        ilossweights_hyperp: int = 1,  # if =1, considering weighted losses for different tasks and treat the weights as hyper parameters
-        loss_weights: list = [1.0, 1.0, 1.0],  # weights for losses of different tasks
-        ilossweights_nll: int = 0,  # if =1, using the scalar uncertainty as weights, as in paper
-        # https://openaccess.thecvf.com/content_cvpr_2018/papers/Kendall_Multi-Task_Learning_Using_CVPR_2018_paper.pdf
+        edge_dim: int,
+        *args,
+        **kwargs,
     ):
 
         self.aggregators = ["mean", "min", "max", "std"]
@@ -46,20 +35,7 @@ def __init__(
         self.deg = torch.Tensor(deg)
         self.edge_dim = edge_dim
 
-        super().__init__(
-            input_dim,
-            hidden_dim,
-            output_dim,
-            output_type,
-            config_heads,
-            ilossweights_hyperp,
-            loss_weights,
-            ilossweights_nll,
-            freeze_conv,
-            dropout,
-            num_conv_layers,
-            num_nodes,
-        )
+        super().__init__(*args, **kwargs)
 
     def get_conv(self, input_dim, output_dim):
         return PNAConv(

diff --git a/hydragnn/models/create.py b/hydragnn/models/create.py
@@ -33,34 +33,35 @@ def create_model_config(
     return create_model(
         config["model_type"],
         config["input_dim"],
-        config["output_dim"],
         config["hidden_dim"],
-        config["num_conv_layers"],
+        config["output_dim"],
         config["output_type"],
         config["output_heads"],
         config["task_weights"],
+        config["num_conv_layers"],
         config["freeze_conv_layers"],
-        config["max_neighbours"],
         config["num_nodes"],
+        config["max_neighbours"],
         config["edge_dim"],
         config["pna_deg"],
         verbosity,
         use_gpu,
     )
 
 
+# FIXME: interface does not include ilossweights_hyperp, ilossweights_nll, dropout
 def create_model(
     model_type: str,
     input_dim: int,
-    output_dim: int,
     hidden_dim: int,
-    num_conv_layers: int,
-    output_type: str,
+    output_dim: list,
+    output_type: list,
     output_heads: dict,
     task_weights: list,
+    num_conv_layers: int,
     freeze_conv: bool = False,
-    max_neighbours: int = None,
     num_nodes: int = None,
+    max_neighbours: int = None,
     edge_dim: int = None,
     pna_deg: torch.tensor = None,
     verbosity: int = 0,
@@ -72,74 +73,80 @@ def create_model(
 
     device = get_device(use_gpu, verbosity_level=verbosity)
 
+    # Note: model-specific inputs must come first.
     if model_type == "GIN":
         model = GINStack(
-            input_dim=input_dim,
-            output_dim=output_dim,
-            hidden_dim=hidden_dim,
-            num_nodes=num_nodes,
-            num_conv_layers=num_conv_layers,
-            output_type=output_type,
-            config_heads=output_heads,
+            input_dim,
+            hidden_dim,
+            output_dim,
+            output_type,
+            output_heads,
             loss_weights=task_weights,
             freeze_conv=freeze_conv,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
         )
 
     elif model_type == "PNA":
         assert pna_deg is not None, "PNA requires degree input."
         model = PNAStack(
-            deg=pna_deg,
-            input_dim=input_dim,
-            output_dim=output_dim,
-            num_nodes=num_nodes,
-            hidden_dim=hidden_dim,
-            num_conv_layers=num_conv_layers,
-            output_type=output_type,
-            config_heads=output_heads,
+            pna_deg,
+            edge_dim,
+            input_dim,
+            hidden_dim,
+            output_dim,
+            output_type,
+            output_heads,
             loss_weights=task_weights,
-            edge_dim=edge_dim,
             freeze_conv=freeze_conv,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
         )
 
     elif model_type == "GAT":
+        # FIXME: expose options to users
+        heads = 6
+        negative_slope = 0.05
         model = GATStack(
-            input_dim=input_dim,
-            output_dim=output_dim,
-            hidden_dim=hidden_dim,
-            num_nodes=num_nodes,
-            num_conv_layers=num_conv_layers,
-            output_type=output_type,
-            config_heads=output_heads,
+            heads,
+            negative_slope,
+            input_dim,
+            hidden_dim,
+            output_dim,
+            output_type,
+            output_heads,
             loss_weights=task_weights,
             freeze_conv=freeze_conv,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
         )
 
     elif model_type == "MFC":
         assert max_neighbours is not None, "MFC requires max_neighbours input."
         model = MFCStack(
-            input_dim=input_dim,
-            output_dim=output_dim,
-            num_nodes=num_nodes,
-            hidden_dim=hidden_dim,
-            max_degree=max_neighbours,
-            num_conv_layers=num_conv_layers,
-            output_type=output_type,
-            config_heads=output_heads,
+            max_neighbours,
+            input_dim,
+            hidden_dim,
+            output_dim,
+            output_type,
+            output_heads,
             loss_weights=task_weights,
             freeze_conv=freeze_conv,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
         )
 
     elif model_type == "CGCNN":
         model = CGCNNStack(
-            input_dim=input_dim,
-            output_dim=output_dim,
-            num_nodes=num_nodes,
-            num_conv_layers=num_conv_layers,
-            output_type=output_type,
-            config_heads=output_heads,
+            edge_dim,
+            input_dim,
+            output_dim,
+            output_type,
+            output_heads,
             loss_weights=task_weights,
-            edge_dim=edge_dim,
             freeze_conv=freeze_conv,
+            num_conv_layers=num_conv_layers,
+            num_nodes=num_nodes,
         )
 
     else: