Skip to content

Commit

Permalink
Merge pull request #108 from spoonsso/as_logging
Browse files Browse the repository at this point in the history
Fixes #86
  • Loading branch information
data-hound authored May 12, 2022
2 parents 45293f2 + 984842d commit 9bcb3e3
Show file tree
Hide file tree
Showing 14 changed files with 336 additions and 118 deletions.
32 changes: 21 additions & 11 deletions cluster/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,15 @@
import yaml
import argparse
import ast
from dannce.engine.io import load_sync, load_com
from dannce.engine.processing import prepare_save_metadata
from dannce import (
_param_defaults_shared,
_param_defaults_dannce,
_param_defaults_com,
)
from typing import Text, List, Tuple
from multi_gpu import build_params_from_config_and_batch


import subprocess
import time
import logging

FILE_PATH = "dance.cluster.grid"

class GridHandler:
def __init__(
Expand Down Expand Up @@ -91,10 +87,13 @@ def submit_jobs(self, batch_params: List, cmd: Text):
batch_params (List): List of batch training parameters.
cmd (Text): System command to be issued.
"""
# Set logging prepend
prepend_log_msg = FILE_PATH + ".GridHandler.submit_jobs "

if self.verbose:
for batch_param in batch_params:
print(batch_param)
print("Command issued: ", cmd)
logging.info(prepend_log_msg + batch_param)
logging.info(prepend_log_msg + "Command issued: ", cmd)
if not self.test:
if isinstance(cmd, list):
for i in range(len(cmd)):
Expand Down Expand Up @@ -136,6 +135,10 @@ def submit_dannce_train_grid(self) -> Tuple[List, Text]:
"""
batch_params = self.generate_batch_params_dannce()

# Setup Logging for dannce_train_single_batch
logging.basicConfig(filename=self.load_params(self.config)["log_dest"], level=self.load_params(self.config)["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

slurm_config = self.load_params(self.load_params(self.config)["slurm_config"])
cmd = (
'sbatch --wait --array=0-%d %s --wrap="%s dannce-train-single-batch %s %s"'
Expand Down Expand Up @@ -163,13 +166,20 @@ def dannce_train_single_batch():
handler = GridHandler(config, grid_config)
batch_params = handler.load_batch_params()
task_id = int(os.getenv("SLURM_ARRAY_TASK_ID"))
print("Task ID = ", task_id)
batch_param = batch_params[task_id]
print(batch_param)


# Build final parameter dictionary
params = build_params_from_config_and_batch(config, batch_param)

# Setup Logging for dannce_train_single_batch
logging.basicConfig(filename=params["log_dest"], level=params["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
prepend_log_msg = FILE_PATH + ".dannce_train_single_batch "

logging.info(prepend_log_msg + "Task ID = ", task_id)
logging.info(prepend_log_msg + batch_param)

# Train
dannce_train(params)

Expand Down
47 changes: 41 additions & 6 deletions cluster/multi_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
)
import scipy.io as spio
from typing import Dict, List, Text
import logging

DANNCE_BASE_NAME = "save_data_AVG"
COM_BASE_NAME = "com3d"
FILE_PATH = "dannce.cluster.multi_gpu"


def loadmat(filename: Text) -> Dict:
Expand Down Expand Up @@ -111,6 +113,26 @@ def __init__(
self.dannce_file = self.load_dannce_file()
else:
self.dannce_file = dannce_file

self.setup_logging()

def load_params(self, param_path: Text) -> Dict:
"""Load a params file
Args:
param_path (Text): Path to parameters file
Returns:
Dict: Parameters dictionary
"""
with open(param_path, "rb") as file:
params = yaml.safe_load(file)
return params

def setup_logging(self):
params = self.load_params(self.config)
logging.basicConfig(filename=params["log_dest"], level=params["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

def save_batch_params(self, batch_params: List):
"""Save the batch_param dictionary to the batch_param file.
Expand Down Expand Up @@ -408,11 +430,12 @@ def submit_jobs(self, batch_params: List, cmd: str):
batch_params (List): Batch parameters list
cmd (str): System command
"""
prepend_log_msg = FILE_PATH + ".MultiGpuHandler.submit_jobs "
if self.verbose:
for batch_param in batch_params:
print("Start sample:", batch_param["start_sample"])
print("End sample:", batch_param["max_num_samples"])
print("Command issued: ", cmd)
logging.debug("Start sample:", batch_param["start_sample"])
logging.debug("End sample:", batch_param["max_num_samples"])
logging.info(prepend_log_msg + "Command issued: ", cmd)
if not self.test:
return os.WEXITSTATUS(os.system(cmd))

Expand Down Expand Up @@ -447,8 +470,10 @@ def submit_com_predict_multi_gpu(self):
Divide project into equal chunks of n_samples_per_gpu samples. Submit an array job
that predicts over each chunk in parallel.
"""
prepend_log_msg = FILE_PATH + ".MultiGpuHandler.submit_com_predict_multi_gpu "

n_samples = self.get_n_samples(self.dannce_file, use_com=False)
print(n_samples)
logging.info(prepend_log_msg + n_samples)
batch_params = self.generate_batch_params_com(n_samples)
slurm_config = load_params(load_params(self.config)["slurm_config"])
cmd = (
Expand Down Expand Up @@ -633,13 +658,18 @@ def dannce_predict_single_batch():
"""CLI entrypoint to predict a single batch."""
from dannce.interface import dannce_predict

prepend_log_msg = FILE_PATH + "dannce_predict_single_batch"

# Load in parameters to modify
config = sys.argv[1]
handler = MultiGpuHandler(config)
batch_params = handler.load_batch_params()
task_id = int(os.getenv("SLURM_ARRAY_TASK_ID"))
batch_param = batch_params[task_id]
print(batch_param)
logging.basicConfig(filename=handler.load_params(handler.config)["log_dest"],
level=handler.load_params(handler.config)["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logging.info(prepend_log_msg + batch_param)

# Build final parameter dictionary
params = build_params_from_config_and_batch(config, batch_param)
Expand All @@ -652,14 +682,19 @@ def com_predict_single_batch():
"""CLI entrypoint to predict a single batch."""
from dannce.interface import com_predict

prepend_log_msg = FILE_PATH + "com_predict_single_batch"

# Load in parameters to modify
config = sys.argv[1]
handler = MultiGpuHandler(config)
batch_params = handler.load_batch_params()
task_id = int(os.getenv("SLURM_ARRAY_TASK_ID"))
# task_id = 0
batch_param = batch_params[task_id]
print(batch_param)
logging.basicConfig(filename=handler.load_params(handler.config)["log_dest"],
level=handler.load_params(handler.config)["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logging.info(prepend_log_msg + batch_param)

# Build final parameter dictionary
params = build_params_from_config_and_batch(config, batch_param, dannce_net=False)
Expand Down
5 changes: 4 additions & 1 deletion configs/dannce_mouse_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,7 @@ nvox: 64
max_num_samples: 1000

# By default, will load in the first hdf5 file at this location for fine-tuning. If training from scratch, set to None
dannce_finetune_weights: ./DANNCE/weights/weights.rat.MAX/
dannce_finetune_weights: ./DANNCE/weights/weights.rat.MAX/

log_level: DEBUG
# log_dest: ../logs/dannce-04-15-22.log
3 changes: 3 additions & 0 deletions dannce/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
"""Dannce module and default parameters"""
# Default parameters, which can be superseded by CL arguments or
# config files
Expand Down Expand Up @@ -46,6 +47,8 @@
"valid_exp": None,
"norm_method":"layer",
"slurm_config": None,
"log_level": "INFO",
"log_dest": "../../logs/dannce_"+datetime.now().strftime("%b%d_%Y")+ ".log",
}
_param_defaults_dannce = {
"metric": ["euclidean_distance_3D"],
Expand Down
21 changes: 19 additions & 2 deletions dannce/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import argparse
import yaml
from typing import Dict, Text
import logging


def load_params(param_path: Text) -> Dict:
Expand Down Expand Up @@ -255,6 +256,18 @@ def add_shared_args(
help="Normalization method to use, can be 'batch', 'instance', or 'layer'.",
)

parser.add_argument(
"--log-level",
dest="log_level",
help="Level of logging to use, can be 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'. Default is 'INFO'.",
)

parser.add_argument(
"--log-dest",
dest="log_dest",
help="Log File location to where logs are to be written to. By default, location is set to ../logs/dannce.log ",
)

return parser


Expand Down Expand Up @@ -844,7 +857,11 @@ def combine(base_params: Dict, clargs: argparse.Namespace, dannce_net: bool) ->
base_params[k] = v
elif v is not None:
base_params[k] = v


if not os.path.exists(os.path.dirname(base_params["log_dest"])):
os.makedirs(os.path.dirname(base_params["log_dest"]))
logging.basicConfig(filename=base_params["log_dest"], level=base_params["log_level"],
format='%(asctime)s %(levelname)s:%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
for k, v in base_params.items():
print("{} set to: {}".format(k, v))
logging.info("{} set to: {}".format(k, v))
return base_params
20 changes: 11 additions & 9 deletions dannce/engine/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import time
import scipy.ndimage.interpolation
import tensorflow as tf
import logging

# from tensorflow_graphics.geometry.transformation.axis_angle import rotate
from multiprocessing.dummy import Pool as ThreadPool
Expand All @@ -24,10 +25,11 @@
)

TF_GPU_MEMORY_FRACTION = 0.9
FILE_PATH = "dannce.engine.generator"


class DataGenerator(keras.utils.Sequence):
"""Generate data for Keras.
"""Generate data for Keras. The object creating instance of this class should have logging enabled.
Attributes:
batch_size (int): Batch size to generate
Expand Down Expand Up @@ -186,7 +188,7 @@ def random_rotate(self, X: np.ndarray, y_3d: np.ndarray, log: bool = False):


class DataGenerator_3Dconv(DataGenerator):
"""Update generator class to handle multiple experiments.
"""Update generator class to handle multiple experiments. The object creator should have logging enabled.
Attributes:
camera_params (Dict): Camera parameters dictionary.
Expand Down Expand Up @@ -330,7 +332,7 @@ def __init__(
self.interp = interp
self.depth = depth
self.channel_combo = channel_combo
print(self.channel_combo)
logging.info(self.channel_combo)
self.mode = mode
self.immode = immode
self.tifdirs = tifdirs
Expand Down Expand Up @@ -912,7 +914,7 @@ def __init__(
self.interp = interp
self.depth = depth
self.channel_combo = channel_combo
print(self.channel_combo)
logging.info(FILE_PATH + ".DataGenerator_3Dconv_torch.__init__" + self.channel_combo if channel_combo is not None else "None")
self.gpu_id = gpu_id
self.mode = mode
self.immode = immode
Expand Down Expand Up @@ -941,7 +943,7 @@ def __init__(
config.gpu_options.per_process_gpu_memory_fraction = TF_GPU_MEMORY_FRACTION
config.gpu_options.allow_growth = True
self.session = tf.compat.v1.Session(config=config, graph=tf.Graph())
print("Executing eagerly: ", tf.executing_eagerly(), flush=True)
logging.info(FILE_PATH + ".DataGenerator_3Dconv_torch.__init__" + "Executing eagerly: " + str(tf.executing_eagerly()))#, flush=True)
for i, ID in enumerate(list_IDs):
experimentID = int(ID.split("_")[0])
for camname in self.camnames[experimentID]:
Expand All @@ -954,7 +956,7 @@ def __init__(
)
self.camera_params[experimentID][camname]["M"] = M

print("Init took {} sec.".format(time.time() - ts))
logging.info(FILE_PATH + ".DataGenerator_3Dconv_torch.__init__" + "Init took {} sec.".format(time.time() - ts))

def __getitem__(self, index: int):
"""Generate one batch of data.
Expand Down Expand Up @@ -1543,7 +1545,7 @@ def __init__(
self.interp = interp
self.depth = depth
self.channel_combo = channel_combo
print(self.channel_combo)
logging.info(FILE_PATH + ".DataGenerator_3Dconv_tf.__init__ " + self.channel_combo if channel_combo is not None else "None")
self.gpu_id = gpu_id
self.mode = mode
self.immode = immode
Expand Down Expand Up @@ -1583,7 +1585,7 @@ def __init__(
ops.camera_matrix(K, R, t), dtype="float32"
)

print("Init took {} sec.".format(time.time() - ts))
logging.info(FILE_PATH + ".DataGenerator_3Dconv_tf.__init__ " + "Init took {} sec.".format(time.time() - ts))

def __getitem__(self, index):
"""Generate one batch of data.
Expand Down Expand Up @@ -2689,7 +2691,7 @@ def on_epoch_end(self):
"""Update indexes after each epoch."""
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
print("SHUFFLING DATA INDICES")
logging.info( FILE_PATH + ".DataGenerator_3Dconv_npy.on_epoch_end " + "SHUFFLING DATA INDICES")
np.random.shuffle(self.indexes)

def rot90(self, X):
Expand Down
Loading

0 comments on commit 9bcb3e3

Please sign in to comment.