diff --git a/configs.example/model/default.yaml b/configs.example/model/default.yaml
index 0c05838..c79e0a0 100644
--- a/configs.example/model/default.yaml
+++ b/configs.example/model/default.yaml
@@ -1,4 +1,4 @@
-_target_: pvnet_summation.models.model.Model
+_target_: pvnet_summation.models.flat_model.FlatModel
 
 output_quantiles: null
 
@@ -12,11 +12,11 @@ model_version: "898630f3f8cd4e8506525d813dd61c6d8de86144"
 output_network:
   _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
   _partial_: True
-output_network_kwargs:
   fc_hidden_features: 128
   n_res_blocks: 2
   res_block_layers: 2
   dropout_frac: 0.0
+
 predict_difference_from_sum: False
 
 # ----------------------------------------------
diff --git a/pvnet_summation/data/datamodule.py b/pvnet_summation/data/datamodule.py
index 14d85ca..498ab0c 100644
--- a/pvnet_summation/data/datamodule.py
+++ b/pvnet_summation/data/datamodule.py
@@ -145,7 +145,6 @@ def __init__(
         self.batch_dir = batch_dir
 
         self._common_dataloader_kwargs = dict(
-            shuffle=False,  # shuffled in datapipe step
             batch_size=None,  # batched in datapipe step
             sampler=None,
             batch_sampler=None,
@@ -164,7 +163,7 @@ def _get_premade_batches_datapipe(self, subdir, shuffle=False, add_filename=Fals
         file_pipeline = FileLister(f"{self.batch_dir}/{subdir}", masks="*.pt", recursive=False)
 
         if shuffle:
-            file_pipeline = file_pipeline.shuffle(buffer_size=1000)
+            file_pipeline = file_pipeline.shuffle(buffer_size=10_000)
 
         file_pipeline = file_pipeline.sharding_filter()
 
@@ -228,14 +227,14 @@ def train_dataloader(self, shuffle=True, add_filename=False):
         datapipe = self._get_premade_batches_datapipe(
             "train", shuffle=shuffle, add_filename=add_filename
         )
-        return DataLoader(datapipe, **self._common_dataloader_kwargs)
+        return DataLoader(datapipe, shuffle=shuffle, **self._common_dataloader_kwargs)
 
     def val_dataloader(self, shuffle=False, add_filename=False):
         """Construct val dataloader"""
         datapipe = self._get_premade_batches_datapipe(
             "val", shuffle=shuffle, add_filename=add_filename
         )
-        return DataLoader(datapipe, **self._common_dataloader_kwargs)
+        return DataLoader(datapipe, shuffle=shuffle, **self._common_dataloader_kwargs)
 
     def test_dataloader(self):
         """Construct test dataloader"""
@@ -265,7 +264,6 @@ def __init__(
         self.batch_dir = batch_dir
 
         self._common_dataloader_kwargs = dict(
-            shuffle=False,  # shuffled in datapipe step
             batch_size=None,  # batched in datapipe step
             sampler=None,
             batch_sampler=None,
@@ -284,7 +282,7 @@ def _get_premade_batches_datapipe(self, subdir, shuffle=False):
         file_pipeline = FileLister(f"{self.batch_dir}/{subdir}", masks="*.pt", recursive=False)
 
         if shuffle:
-            file_pipeline = file_pipeline.shuffle(buffer_size=1000)
+            file_pipeline = file_pipeline.shuffle(buffer_size=10_000)
 
         sample_pipeline = file_pipeline.sharding_filter().map(torch.load)
 
@@ -300,21 +298,21 @@ def _get_premade_batches_datapipe(self, subdir, shuffle=False):
 
         return batch_pipeline
 
-    def train_dataloader(self, shuffle=True):
+    def train_dataloader(self):
         """Construct train dataloader"""
         datapipe = self._get_premade_batches_datapipe(
             "train",
-            shuffle=shuffle,
+            shuffle=True,
         )
-        return DataLoader(datapipe, **self._common_dataloader_kwargs)
+        return DataLoader(datapipe, shuffle=True, **self._common_dataloader_kwargs)
 
-    def val_dataloader(self, shuffle=False):
+    def val_dataloader(self):
         """Construct val dataloader"""
         datapipe = self._get_premade_batches_datapipe(
             "val",
-            shuffle=shuffle,
+            shuffle=False,
         )
-        return DataLoader(datapipe, **self._common_dataloader_kwargs)
+        return DataLoader(datapipe, shuffle=False, **self._common_dataloader_kwargs)
 
     def test_dataloader(self):
         """Construct test dataloader"""
diff --git a/pvnet_summation/models/model.py b/pvnet_summation/models/flat_model.py
similarity index 75%
rename from pvnet_summation/models/model.py
rename to pvnet_summation/models/flat_model.py
index 96528a5..1996f0b 100644
--- a/pvnet_summation/models/model.py
+++ b/pvnet_summation/models/flat_model.py
@@ -7,41 +7,39 @@
 import torch
 import torch.nn.functional as F
 from pvnet.models.multimodal.linear_networks.basic_blocks import AbstractLinearNetwork
-from pvnet.models.multimodal.linear_networks.networks import DefaultFCNet
 from pvnet.optimizers import AbstractOptimizer
 from torch import nn
 
 from pvnet_summation.models.base_model import BaseModel
 
-_default_optimizer = pvnet.optimizers.Adam()
 
+class FlatModel(BaseModel):
+    """Neural network which combines GSP predictions from PVNet naively
 
-class Model(BaseModel):
-    """Neural network which combines GSP predictions from PVNet"""
+    This model flattens all the features into a 1D vector before feeding them into the sub network
+    """
 
-    name = "pvnet_summation_model"
+    name = "FlatModel"
 
     def __init__(
         self,
+        output_network: AbstractLinearNetwork,
         model_name: str,
-        model_version: Optional[str],
+        model_version: Optional[str] = None,
         output_quantiles: Optional[list[float]] = None,
-        output_network: AbstractLinearNetwork = DefaultFCNet,
-        output_network_kwargs: Optional[dict] = None,
         relative_scale_pvnet_outputs: bool = False,
         predict_difference_from_sum: bool = False,
-        optimizer: AbstractOptimizer = _default_optimizer,
+        optimizer: AbstractOptimizer = pvnet.optimizers.Adam(),
     ):
-        """Neural network which combines GSP predictions from PVNet
+        """Neural network which combines GSP predictions from PVNet naively
 
         Args:
             model_name: Model path either locally or on huggingface.
             model_version: Model version if using huggingface. Set to None if using local.
             output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
                 None the output is a single value.
-            output_network: Pytorch Module class used to combine the 1D features to produce the
-                forecast.
-            output_network_kwargs: Dictionary of optional kwargs for the `output_network` module.
+            output_network: A partially instantiated pytorch Module class used to combine the 1D
+                features to produce the forecast.
             relative_scale_pvnet_outputs: If true, the PVNet predictions are scaled by a factor
                 which is proportional to their capacities.
             predict_difference_from_sum: Whether to use the sum of GSPs as an estimate for the
@@ -55,13 +53,9 @@ def __init__(
         self.relative_scale_pvnet_outputs = relative_scale_pvnet_outputs
         self.predict_difference_from_sum = predict_difference_from_sum
 
-        if output_network_kwargs is None:
-            output_network_kwargs = dict()
-
         self.model = output_network(
             in_features=np.prod(self.pvnet_output_shape),
             out_features=self.num_output_features,
-            **output_network_kwargs,
         )
 
         # Add linear layer if predicting difference from sum
@@ -85,9 +79,10 @@ def forward(self, x):
             else:
                 eff_cap = x["effective_capacity"]
 
-            # Multiply by (effective capacity / 100) since the capacities are roughly of magnitude
-            # of 100 MW. We still want the inputs to the network to be order of magnitude 1.
-            x_in = x["pvnet_outputs"] * (eff_cap / 100)
+            # The effective_capacit[ies] are relative fractions of the national capacity. They sum
+            # to 1 and they are quite small values. For the largest GSP the capacity is around 0.03.
+            # Therefore we apply this scaling to make the input values a more sensible size
+            x_in = x["pvnet_outputs"] * eff_cap * 100
         else:
             x_in = x["pvnet_outputs"]
 
diff --git a/tests/conftest.py b/tests/conftest.py
index edabfc8..951fd85 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,8 +8,8 @@
 import math
 import glob
 import tempfile
-from pvnet_summation.models.model import Model
-
+import hydra
+from pvnet_summation.models.flat_model import FlatModel
 
 from ocf_datapipes.batch import BatchKey
 from datetime import timedelta
@@ -134,22 +134,31 @@ def sample_batch(sample_datamodule):
 
 
 @pytest.fixture()
-def model_kwargs():
-    # These kwargs define the pvnet model which the summation model uses
+def flat_model_kwargs():
     kwargs = dict(
+        # These kwargs define the pvnet model which the summation model uses
         model_name="openclimatefix/pvnet_v2",
         model_version="4203e12e719efd93da641c43d2e38527648f4915",
+        # These kwargs define the structure of the summation model
+        output_network=dict(
+            _target_="pvnet.models.multimodal.linear_networks.networks.ResFCNet2",
+            _partial_=True,
+            fc_hidden_features=128,
+            n_res_blocks=2,
+            res_block_layers=2,
+            dropout_frac=0.0,
+        ),
     )
-    return kwargs
+    return hydra.utils.instantiate(kwargs)
 
 
 @pytest.fixture()
-def model(model_kwargs):
-    model = Model(**model_kwargs)
+def model(flat_model_kwargs):
+    model = FlatModel(**flat_model_kwargs)
     return model
 
 
 @pytest.fixture()
-def quantile_model(model_kwargs):
-    model = Model(output_quantiles=[0.1, 0.5, 0.9], **model_kwargs)
+def quantile_model(flat_model_kwargs):
+    model = FlatModel(output_quantiles=[0.1, 0.5, 0.9], **flat_model_kwargs)
     return model
diff --git a/tests/models/test_model.py b/tests/models/test_flat_model.py
similarity index 100%
rename from tests/models/test_model.py
rename to tests/models/test_flat_model.py