Skip to content

Commit

Permalink
fix: identation
Browse files Browse the repository at this point in the history
  • Loading branch information
sehoffmann committed Mar 28, 2024
1 parent 235c563 commit e142bd3
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 37 deletions.
17 changes: 9 additions & 8 deletions dmlcloud/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def register_model(

if verbose:
msg = f'Model "{name}":\n'
msg += f' - Parameters: {sum(p.numel() for p in model.parameters())/1e6:.1f} kk\n'
msg += f' - DDP: {use_ddp}\n'
msg += f' - {model}'
msg += f' - Parameters: {sum(p.numel() for p in model.parameters())/1e6:.1f} kk\n'
msg += f' - DDP: {use_ddp}\n'
msg += f' - {model}'
self.logger.info(msg)

def register_optimizer(self, name: str, optimizer, scheduler=None):
Expand All @@ -91,11 +91,11 @@ def register_dataset(self, name: str, dataset: Union[DataLoader, Dataset, Sequen
msg = f'Dataset "{name}":\n'
try:
length = len(dataset)
msg += f' - Batches (Total): ~{length * dist.get_world_size()}\n'
msg += f' - Batches (/Worker): {length}\n'
msg += f' - Batches (Total): ~{length * dist.get_world_size()}\n'
msg += f' - Batches (/Worker): {length}\n'
except TypeError: # __len__ not implemented
msg += ' - Batches (Total): N/A\n'
msg += ' - Batches (/Worker): N/A\n'
msg += ' - Batches (Total): N/A\n'
msg += ' - Batches (/Worker): N/A\n'
self.logger.info(msg)

def append_stage(self, stage: Stage, max_epochs: Optional[int] = None, name: Optional[str] = None):
Expand Down Expand Up @@ -231,7 +231,8 @@ def _pre_run(self):
self._resume_run()

diagnostics = general_diagnostics()
diagnostics += '\n* CONFIG:\n' + OmegaConf.to_yaml(self.config)
diagnostics += '\n* CONFIG:\n'
diagnostics += '\n'.join(f' {line}' for line in OmegaConf.to_yaml(self.config, resolve=True).splitlines())
self.logger.info(diagnostics)

self.pre_run()
Expand Down
58 changes: 29 additions & 29 deletions dmlcloud/util/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,58 +112,58 @@ def experiment_header(

def general_diagnostics() -> str:
msg = '* GENERAL:\n'
msg += f' - argv: {sys.argv}\n'
msg += f' - cwd: {Path.cwd()}\n'
msg += f' - argv: {sys.argv}\n'
msg += f' - cwd: {Path.cwd()}\n'

msg += f' - host (root): {os.environ.get("HOSTNAME")}\n'
msg += f' - user: {os.environ.get("USER")}\n'
msg += f' - git-hash: {git_hash()}\n'
msg += f' - conda-env: {os.environ.get("CONDA_DEFAULT_ENV", "N/A")}\n'
msg += f' - sys-prefix: {sys.prefix}\n'
msg += f' - backend: {dist.get_backend()}\n'
msg += f' - cuda: {torch.cuda.is_available()}\n'
msg += f' - host (root): {os.environ.get("HOSTNAME")}\n'
msg += f' - user: {os.environ.get("USER")}\n'
msg += f' - git-hash: {git_hash()}\n'
msg += f' - conda-env: {os.environ.get("CONDA_DEFAULT_ENV", "N/A")}\n'
msg += f' - sys-prefix: {sys.prefix}\n'
msg += f' - backend: {dist.get_backend()}\n'
msg += f' - cuda: {torch.cuda.is_available()}\n'

if torch.cuda.is_available():
msg += '* GPUs (root):\n'
nvsmi = subprocess.run(['nvidia-smi', '-L'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout.decode()
for line in nvsmi.splitlines():
msg += f' - {line}\n'
msg += f' - {line}\n'

msg += '* VERSIONS:\n'
msg += f' - python: {sys.version}\n'
msg += f' - dmlcloud: {dmlcloud.__version__}\n'
msg += f' - cuda: {torch.version.cuda}\n'
msg += f' - python: {sys.version}\n'
msg += f' - dmlcloud: {dmlcloud.__version__}\n'
msg += f' - cuda: {torch.version.cuda}\n'
try:
msg += ' - ' + Path('/proc/driver/nvidia/version').read_text().splitlines()[0] + '\n'
except (FileNotFoundError, IndexError):
pass

msg += f' - torch: {torch.__version__}\n'
msg += f' - torch: {torch.__version__}\n'
if try_get_version('torchvision'):
msg += f' - torchvision: {try_get_version("torchvision")}\n'
msg += f' - torchvision: {try_get_version("torchvision")}\n'
if try_get_version('torchtext'):
msg += f' - torchtext: {try_get_version("torchtext")}\n'
msg += f' - torchtext: {try_get_version("torchtext")}\n'
if try_get_version('torchaudio'):
msg += f' - torchaudio: {try_get_version("torchaudio")}\n'
msg += f' - torchaudio: {try_get_version("torchaudio")}\n'
if try_get_version('einops'):
msg += f' - einops: {try_get_version("einops")}\n'
msg += f' - einops: {try_get_version("einops")}\n'
if try_get_version('numpy'):
msg += f' - numpy: {try_get_version("numpy")}\n'
msg += f' - numpy: {try_get_version("numpy")}\n'
if try_get_version('pandas'):
msg += f' - pandas: {try_get_version("pandas")}\n'
msg += f' - pandas: {try_get_version("pandas")}\n'
if try_get_version('xarray'):
msg += f' - xarray: {try_get_version("xarray")}\n'
msg += f' - xarray: {try_get_version("xarray")}\n'
if try_get_version('sklearn'):
msg += f' - sklearn: {try_get_version("sklearn")}\n'
msg += f' - sklearn: {try_get_version("sklearn")}\n'

if 'SLURM_JOB_ID' in os.environ:
msg += '* SLURM:\n'
msg += f' - SLURM_JOB_ID = {slurm.slurm_job_id()}\n'
msg += f' - SLURM_STEP_ID = {slurm.slurm_step_id()}\n'
msg += f' - SLURM_STEP_NODELIST = {os.environ.get("SLURM_STEP_NODELIST")}\n'
msg += f' - SLURM_TASKS_PER_NODE = {os.environ.get("SLURM_TASKS_PER_NODE")}\n'
msg += f' - SLURM_STEP_GPUS = {os.environ.get("SLURM_STEP_GPUS")}\n'
msg += f' - SLURM_GPUS_ON_NODE = {os.environ.get("SLURM_GPUS_ON_NODE")}\n'
msg += f' - SLURM_CPUS_PER_TASK = {os.environ.get("SLURM_CPUS_PER_TASK")}'
msg += f' - SLURM_JOB_ID = {slurm.slurm_job_id()}\n'
msg += f' - SLURM_STEP_ID = {slurm.slurm_step_id()}\n'
msg += f' - SLURM_STEP_NODELIST = {os.environ.get("SLURM_STEP_NODELIST")}\n'
msg += f' - SLURM_TASKS_PER_NODE = {os.environ.get("SLURM_TASKS_PER_NODE")}\n'
msg += f' - SLURM_STEP_GPUS = {os.environ.get("SLURM_STEP_GPUS")}\n'
msg += f' - SLURM_GPUS_ON_NODE = {os.environ.get("SLURM_GPUS_ON_NODE")}\n'
msg += f' - SLURM_CPUS_PER_TASK = {os.environ.get("SLURM_CPUS_PER_TASK")}'

return msg

0 comments on commit e142bd3

Please sign in to comment.