Skip to content

Commit

Permalink
Added sigma dora, minor fixes and energy usage
Browse files Browse the repository at this point in the history
  • Loading branch information
OniDaito committed Oct 1, 2021
1 parent 8f5ff2c commit c3b49e8
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 137 deletions.
20 changes: 10 additions & 10 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ If you want some fancy formatted results or if you are running the tl;dr script,
* [Imagemagick](https://imagemagick.org/index.php)
* [ffmpeg](https://ffmpeg.org/)

Finally, in order to view the model the network has derived, you'll need a program capable of displaying obj or ply files. These are pretty numerous, but here are a few we use:

* [meshlab](https://www.meshlab.net/)
* [blender(https://www.blender.org/)

Chances are, if you are running a Linux distribution, you will have these already and if not, they'll be in your repository management system (like apt or pacman).

All of the python requirements are listed in the requirements.txt file (there aren't too many).
Expand Down Expand Up @@ -175,6 +180,10 @@ The data used in the paper comes from [Suliana Manley's research group](https://

... filling the missing steps with the other sigma levels. This takes quite a while, even with only 10 augmentations. The pre-rendered data can be found on [Zenodo](https://zenodo.org/record/4751057) complete the with the full instructions for generating it.

### Energy Usage

The estimated energy use to train a simulated model is 2.1kWh based on a measurement of 623.4kWh over 166 days. In this period, 298 models were trained and evaluated. This was confirmed by cross-checking against the wattage of the GPU and the time spent to generate an average model.

## Outputs

Once you have a trained network, you can generate the final outputs using the *generate_stats.sh* script found in the *run* directory.
Expand Down Expand Up @@ -232,12 +241,6 @@ When running train.py, there are a number of options one can choose.
--lr
Learning rate (default: 0.0004).

--mask-thresh
Threshold for what we consider in the loss (default: 0.05)

--plr
Learning rate for points (default: same as the learning rate).

--spawn-rate
Probabilty of spawning a point (default: 1.0).

Expand Down Expand Up @@ -268,9 +271,6 @@ When running train.py, there are a number of options one can choose.
--normalise-basic
Normalise with torch basic intensity divide.

--scheduler
Use a scheduler on the loss.

--seed
Random seed (default: 1).

Expand All @@ -295,7 +295,7 @@ When running train.py, there are a number of options one can choose.
--load
A checkpoint file to load in order to continue training.

"--savename
--savename
The name for checkpoint save file.

--savedir
Expand Down
42 changes: 13 additions & 29 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from util.math import VecRotTen, VecRot, TransTen, PointsTen


def angle_eval(args, model, points, batch_size, device):
def angle_eval(args, model, points, prev_args, device):
"""For every angle, save the in and out so we can assess where the
network is failing."""
xt = 0.0
Expand Down Expand Up @@ -90,15 +90,15 @@ def angle_eval(args, model, points, batch_size, device):
save_image(result, args.savedir + "/" + "eval_in_" + str(idx).zfill(3) + ".jpg")

target = result.reshape(1, 128, 128)
target = target.repeat(batch_size, 1, 1, 1)
target = target.repeat(prev_args.batch_size, 1, 1, 1)
target = target.to(device)
target = normaliser.normalise(target)

# We use tpoints because otherwise we can't update points
# and keep working out the gradient cos pytorch weirdness
model.set_sigma(args.sigma)
output = model.forward(target, points)
output = normaliser.normalise(output.reshape(batch_size, 1, 128, 128))
output = normaliser.normalise(output.reshape(prev_args.batch_size, 1, 128, 128))
loss = F.l1_loss(output, target)

output = torch.squeeze(output.cpu()[0])
Expand All @@ -109,7 +109,7 @@ def angle_eval(args, model, points, batch_size, device):
S.write_immediate(loss, "eval_loss", 0, 0, idx)


def basic_eval(args, model, points, batch_size, device):
def basic_eval(args, model, points, prev_args, device):
""" Our basic evaluation step. """
xr = 0.0
yr = 0.0
Expand All @@ -135,7 +135,7 @@ def basic_eval(args, model, points, batch_size, device):
t = TransTen(xt, yt)

normaliser = NormaliseNull()
if args.normalise_basic:
if prev_args.normalise_basic:
normaliser = NormaliseTorch()

# Setup our splatting pipeline which is added to both dataloader
Expand All @@ -154,15 +154,15 @@ def basic_eval(args, model, points, batch_size, device):
save_image(result.clone().cpu(), args.savedir + "/" + "eval_single_in.jpg")

target = result.reshape(1, 128, 128)
target = target.repeat(batch_size, 1, 1, 1)
target = target.repeat(prev_args.batch_size, 1, 1, 1)
target = target.to(device)
target = normaliser.normalise(target)

# We use tpoints because otherwise we can't update points
# and keep working out the gradient cos pytorch weirdness
model.set_sigma(args.sigma)
output = model.forward(target, points)
output = normaliser.normalise(output.reshape(batch_size, 1, 128, 128))
output = normaliser.normalise(output.reshape(prev_args.batch_size, 1, 128, 128))
loss = F.l1_loss(output, target)
print("Loss :", loss)
print("Rotations returned:", model.get_rots())
Expand Down Expand Up @@ -205,28 +205,24 @@ def evaluate(args, device, animate=False):
S.on(args.savedir)
model = None
points = None
model = load_model(args.savedir + "/model.tar", device)

if os.path.isfile(args.savedir + "/" + args.savename):
(model, points) = load_checkpoint(
args.savedir, args.savename, device, evaluation=True
(model, points, _, _, _, _, prev_args) = load_checkpoint(
model, args.savedir, args.savename, device
)
model = load_model(args.savedir + "/model.tar", device)
model.to(device)
print("Loaded model", model)
else:
print("Error - need to pass in a model")
return

batch_size = model._final.size()[0]
print("Batch Size :", batch_size)

model.eval()
random.seed()
if args.predict_sigma:
model.predict_sigma = True
basic_eval(args, model, points, batch_size, device)
basic_eval(args, model, points, prev_args, device)

if animate:
angle_eval(args, model, points, batch_size, device)
angle_eval(args, model, points, prev_args, device)
S.close()


Expand All @@ -237,18 +233,6 @@ def evaluate(args, device, animate=False):
parser.add_argument(
"--no-cuda", action="store_true", default=False, help="disables CUDA training"
)
parser.add_argument(
"--predict-sigma",
action="store_true",
default=False,
help="Does this model predict sigma",
)
parser.add_argument(
"--normalise-basic",
action="store_true",
default=False,
help="Use a basic normaliser",
)
parser.add_argument(
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
Expand Down
4 changes: 2 additions & 2 deletions net/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ def forward(self, target: torch.Tensor, points: PointsTen):
nx = 2

if self.predict_translate:
tx = (torch.tanh(rot[nx+1]) * 2.0) * self.max_shift
ty = (torch.tanh(rot[nx+2]) * 2.0) * self.max_shift
nx += 2
tx = (torch.tanh(rot[3]) * 2.0) * self.max_shift
ty = (torch.tanh(rot[4]) * 2.0) * self.max_shift

sp = nn.Softplus(threshold=12)
final_sigma = self.sigma
Expand Down
7 changes: 4 additions & 3 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,12 @@ def image_test(model, points, device, sigma, input_image):
if args.load and os.path.isfile(args.load + "/checkpoint.pth.tar"):
# (savedir, savename) = os.path.split(args.load)
# print(savedir, savename)
(model, points) = load_checkpoint(
args.load, "checkpoint.pth.tar", device, evaluation=True
)
model = load_model(args.load + "/model.tar")
(model, points, _, _, _, _, prev_args) = load_checkpoint(
args.load, "checkpoint.pth.tar", device
)
model = model.to(device)
model.eval()
else:
print("--load must point to a run directory.")
sys.exit(0)
Expand Down
2 changes: 1 addition & 1 deletion run/run.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
# Execute the following commands to get the code associated with this run:
#git clone [email protected]:oni/shaper.git
#git reset --hard d94277d56121287a7417153c5ce579d7be7e525
python3 ../train.py --savedir ../../runs/temp --save-interval 400 --train-size 100 --test-size 40 --valid-size 10 --objpath ../objs/teapot.obj --no-cuda --buffer-size 200 --epochs 20 --batch-size 2 --log-interval 100 --num-points 50 --lr 0.0004 --sigma-file sigma.csv
python ../train.py --savedir ../../runs/temp --save-interval 400 --train-size 100 --test-size 40 --valid-size 10 --objpath ../objs/teapot_large.obj --no-cuda --buffer-size 200 --epochs 20 --batch-size 2 --log-interval 100 --num-points 50 --lr 0.0004 --sigma-file sigma.csv
1 change: 1 addition & 0 deletions run/sigma_dora.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.8,2.8,2.52,2.52,2.27,2.27,2.0,2.0,1.8,1.8,1.6,1.6,1.2,1.2,1.0,1.0,0.0,0.0,0.0,0.0
62 changes: 5 additions & 57 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import math
import random
Expand All @@ -39,14 +38,12 @@
from util.math import PointsTen


def calculate_loss(args, target: torch.Tensor, output: torch.Tensor):
def calculate_loss(target: torch.Tensor, output: torch.Tensor):
"""
Our loss function, used in train and test functions.
Parameters
----------
args : dict
The arguments object created in __main__
target : torch.Tensor
The target, properly shaped.
Expand All @@ -60,17 +57,7 @@ def calculate_loss(args, target: torch.Tensor, output: torch.Tensor):
A loss object
"""

# Removed the masked loss and stuck with the basic one as it gives a cleaner
# final model and didn't really fix the double headed problem.
#target_masked = (target > args.mask_thresh).float()
#target = torch.mul(target, target_masked)

#output = output.reshape(args.batch_size, 1, args.image_size, args.image_size)

#masked_output = torch.mul(output, target_masked)
#loss = F.l1_loss(masked_output, target, reduction="sum")
loss = F.l1_loss(output, target, reduction="sum")

return loss


Expand Down Expand Up @@ -232,7 +219,6 @@ def train(
buffer_test,
data_loader,
optimiser,
use_scheduler=False,
):
"""
Now we've had some setup, lets do the actual training.
Expand Down Expand Up @@ -262,14 +248,6 @@ def train(
"""

model.train()
scheduler = ReduceLROnPlateau(
optimiser,
mode="min",
factor=0.1,
patience=10,
threshold=0.0001,
threshold_mode="abs",
)

# Which normalisation are we using?
normaliser = NormaliseNull()
Expand Down Expand Up @@ -304,7 +282,7 @@ def train(

output = normaliser.normalise(model(target_shaped, points))

loss = calculate_loss(args, target_shaped, output)
loss = calculate_loss(target_shaped, output)
loss.backward()
lossy = loss.item()
optimiser.step()
Expand Down Expand Up @@ -354,14 +332,13 @@ def train(
epoch,
batch_idx,
loss,
sigma,
args,
args.savedir,
args.savename,
)

buffer_train.set.shuffle()
# TODO - This loss should be on the validation set but for now...
if use_scheduler:
scheduler.step(loss)

# Save a final points file once training is complete
S.save_points(points, args.savedir, epoch, batch_idx)
Expand Down Expand Up @@ -511,12 +488,8 @@ def init(args, device):
set_train.save(args.savedir + "/train_set.pickle")
data_loader.save(args.savedir + "/train_data.pickle")

plr = args.lr
if args.plr is not None:
plr = args.plr
variables = []
variables.append({"params": model.parameters()})
variables.append({"params": points.data, "lr": plr})
optimiser = optim.AdamW(variables, lr=args.lr)
print("Starting new model")

Expand All @@ -530,8 +503,7 @@ def init(args, device):
buffer_train,
buffer_test,
data_loader,
optimiser,
use_scheduler=args.scheduler
optimiser
)

save_model(model, args.savedir + "/model.tar")
Expand All @@ -545,38 +517,21 @@ def init(args, device):
"--batch-size",
type=int,
default=20,
metavar="N",
help="input batch size for training \
(default: 20)",
)
parser.add_argument(
"--epochs",
type=int,
default=10,
metavar="N",
help="number of epochs to train (default: 10)",
)
parser.add_argument(
"--lr",
type=float,
default=0.0004,
metavar="LR",
help="learning rate (default: 0.0004)",
)
parser.add_argument(
"--mask-thresh",
type=float,
default=0.05,
metavar="LR",
help="Threshold for what we consider in the loss \
(default: 0.05)",
)
parser.add_argument(
"--plr",
type=float,
default=None,
help="Learning rate for points (default: The same as the learning rate).",
)
parser.add_argument(
"--spawn-rate",
type=float,
Expand Down Expand Up @@ -646,12 +601,6 @@ def init(args, device):
default=False,
help="Normalise with torch basic intensity divide",
)
parser.add_argument(
"--scheduler",
action="store_true",
default=False,
help="Use a scheduler on the loss.",
)
parser.add_argument(
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
Expand Down Expand Up @@ -700,7 +649,6 @@ def init(args, device):
"--save-interval",
type=int,
default=1000,
metavar="N",
help="how many batches to wait before saving.",
)
parser.add_argument(
Expand Down
Loading

0 comments on commit c3b49e8

Please sign in to comment.