Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Display latest histogram by default #2834

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog

## 3.17.5 Jun 2, 2023

- Fix gpu stat collection when driver is not loaded (mihran113)
- Fix issue with overflowing box content in full-view mode in Base Explorers (KaroMourad)
- Resolve tags list visibility issue in tags page (arsengit)
- Fix issue on git stat collection (mihran113)
- Import `Image` and `Audio` for `TensorboardFolderTracker` (alansaul)
- Extend `aim.ext.tensorboard_tracker.run.Run` to allow stdout logging and system stats and parameter logging (alansaul)
- Add the ability for `TensorboardFolderTracker` to track `Histogram`'s as Aim `Distribution`'s (alansaul)
- Convert NaNs and Infs in responses to strings (n-gao)
- Add activeloop deeplake plugin (drahnreb)

## 3.17.4 May 4, 2023

- Resolve run messages duplication issue for in progress runs (roubkar)
Expand Down
2 changes: 1 addition & 1 deletion aim/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.17.4
3.17.5
2 changes: 1 addition & 1 deletion aim/ext/resource/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def get_stats(self):
pass
gpus.append(gpu_info)
nvml.nvmlShutdown()
except (nvml.NVMLError_LibraryNotFound, nvml.NVMLError_NotSupported):
except nvml.NVMLError:
pass

return system, gpus
27 changes: 18 additions & 9 deletions aim/ext/tensorboard_tracker/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional, Union

from aim.sdk.run import BasicRun
from aim.sdk.run import Run as SdkRun
from aim.ext.tensorboard_tracker.tracker import TensorboardTracker

from typing import TYPE_CHECKING
Expand All @@ -9,14 +9,23 @@
from aim.sdk.repo import Repo


class Run(BasicRun):
def __init__(self, run_hash: Optional[str] = None, *,
sync_tensorboard_log_dir: str,
repo: Optional[Union[str, 'Repo']] = None,
experiment: Optional[str] = None,
force_resume: Optional[bool] = False,
):
super().__init__(run_hash, repo=repo, read_only=False, experiment=experiment, force_resume=force_resume)
class Run(SdkRun):
def __init__(
self, run_hash: Optional[str] = None, *,
sync_tensorboard_log_dir: str,
repo: Optional[Union[str, 'Repo']] = None,
experiment: Optional[str] = None,
force_resume: Optional[bool] = False,
system_tracking_interval: Optional[Union[int, float]] = None,
log_system_params: Optional[bool] = False,
capture_terminal_logs: Optional[bool] = False,
):
super().__init__(
run_hash, repo=repo, read_only=False, experiment=experiment, force_resume=force_resume,
system_tracking_interval=system_tracking_interval, log_system_params=log_system_params,
capture_terminal_logs=capture_terminal_logs
)

self['tb_log_directory'] = sync_tensorboard_log_dir
self._tensorboard_tracker = TensorboardTracker(self._tracker, sync_tensorboard_log_dir)
self._tensorboard_tracker.start()
Expand Down
65 changes: 61 additions & 4 deletions aim/ext/tensorboard_tracker/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,63 @@
import weakref
import queue

from typing import TYPE_CHECKING, Any
from typing import Any
from aim import Audio, Image, Distribution

if TYPE_CHECKING:
from aim import Audio, Image

def _decode_histogram(value):
"""
From the tensorflow histogram representation (not plugin), create an aim Distribution

:param value: value with `histo` property
:return: aim Distribution
"""
bin_counts = list(value.histo.bucket)
bucket_limits = list(value.histo.bucket_limit)

if (len(bin_counts) <= 2) or (len(bucket_limits) < 2) or (bucket_limits[0] == bucket_limits[-1]):
return None

# This is a bit weird but it seems the histogram counts is usually padded by 0 as tensorboard
# only stores the right limits?
# See https://github.com/pytorch/pytorch/blob/7d2a18da0b3427fcbe44b461a0aa508194535885/torch/utils/tensorboard/summary.py#L390 # noqa
bin_counts = bin_counts[1:]

bin_range = (bucket_limits[0], bucket_limits[-1])
track_val = Distribution(hist=bin_counts, bin_range=bin_range)
return track_val


def _decode_histogram_from_plugin(value):
"""
Convert from tensorflow histogram plugin representation of the data as a tensor back into
a `aim` `Distribution`

Representation of histogram given by tf summary is obtained from here:
https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/histogram/summary_v2.py

:param value: value with a tensor that contains three columns, left_edge, right_edge,
bin_values
:return: aim Distribution
"""
left_right_bins = tensor_util.make_ndarray(value.tensor)
if left_right_bins is None:
return None

left_edge = left_right_bins[:, 0]
right_edge = left_right_bins[:, 1]
bin_counts = left_right_bins[:, 2]

bin_range = (left_edge[0], right_edge[-1])

is_empty = False
is_empty |= (left_right_bins.shape[0] == 0)
is_empty |= (bin_range[0] == bin_range[1])
if is_empty:
return None

track_val = Distribution(hist=bin_counts, bin_range=bin_range)
return track_val


class TensorboardTracker:
Expand Down Expand Up @@ -70,7 +123,7 @@ def close(self):
class TensorboardFolderTracker:
def __init__(self, tensorboard_event_folder: str, queue: queue.Queue) -> None:
self.queue = queue
self.supported_plugins = ("images", "scalars")
self.supported_plugins = ("images", "scalars", "histograms")
self.unsupported_plugin_noticed = False
self.folder_name = os.path.basename(tensorboard_event_folder)
self._thread = threading.Thread(target=self._process_event)
Expand Down Expand Up @@ -131,6 +184,8 @@ def create_ndarray(tensor):
track_val = [Image(tf.image.decode_image(t).numpy()) for t in tensor]
if len(track_val) == 1:
track_val = track_val[0]
elif plugin_name == "histograms":
track_val = _decode_histogram_from_plugin(value)
elif plugin_name == "scalars" or plugin_name == "":
track_val = create_ndarray(value.tensor)
else:
Expand All @@ -142,6 +197,8 @@ def create_ndarray(tensor):
elif value.HasField("audio"):
tf_audio, sample_rate = tf.audio.decode_wav(value.audio.encoded_audio_string)
track_val = Audio(tf_audio.numpy(), rate=sample_rate)
elif value.HasField("histo"):
track_val = _decode_histogram(value)

except RuntimeError as exc:
# catch all the nasty failures
Expand Down
2 changes: 1 addition & 1 deletion aim/ext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_git_info():

try:
commit_hash, commit_timestamp, commit_author = results.get('commit').split('/')
except ValueError:
except (ValueError, AttributeError):
commit_hash = commit_timestamp = commit_author = None

git_info.update({
Expand Down
141 changes: 141 additions & 0 deletions aim/sdk/objects/plugins/deeplake_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from aim.storage.object import CustomObject
import deeplake
import warnings
import logging
from deeplake.util.exceptions import ReadOnlyModeError

logger = logging.getLogger(__name__)


class UncommittedDatasetWarning(UserWarning):
pass


class ViewOnUncommittedDatasetWarning(UserWarning):
pass


@CustomObject.alias('deeplake.dataset')
class DeeplakeDataset(CustomObject):
"""
Track Activeloop Deeplake Dataset with versioning.

It is generally advised to commit dataset changes before logging to runs. If dataset is a view on head of
uncommitted changes, it's recommmended to commit dataset changes first then creating a view and loggging it.
By default, this is ignored and results in limited traceability. Set both ``auto_commit`` and ``auto_save_view``
to ``True`` for automatic commit or saves.
Unsaved views created on a dataset head with changes passed to this class cannot be saved.

:param auto_commit: If dataset head node and uncommitted dataset changes are present an auto_commit
will trigger a dataset commit ``autocommit on aim run`` to enable reproducibility of the run,
defaults to ``False``.

:param auto_save_view: Triggers a save of a view if dataset is an unsaved view on a committed head
to enable reproducibility of the run, defaults to ``False``.

:raises TypeError: if the dataset is not a deeplake.Dataset
:raises ValueError: if the dataset is a view and has uncommitted changes on its head but should be saved.

.. code-block:: python

import deeplake

from aim.sdk.objects.plugins.deeplake_dataset import DeeplakeDataset
from aim.sdk import Run

# create dataset object
ds = deeplake.dataset('hub://activeloop/cifar100-test')

# log dataset metadata
run = Run(system_tracking_interval=None)
run['deeplake_ds'] = DeeplakeDataset(ds)
"""
AIM_NAME = 'deeplake.dataset'

def __init__(self, dataset: deeplake.Dataset, auto_commit: bool = False, auto_save_view: bool = False):
super().__init__()

if not isinstance(dataset, deeplake.Dataset):
raise TypeError("dataset must be of type ``deeplake.Dataset``")

if dataset.has_head_changes:
if dataset.is_view:
if any((auto_commit, auto_save_view)):
raise ValueError(
"Dataset is a view on head of uncommitted changes. "
"Commit dataset changes before creating a view. "
"To ignore with limited traceability set both"
"``auto_commit`` and ``auto_save_view`` to ``False``."
)
else:
warnings.warn(
"There is little to trace back data to this run. "
"Dataset is a view on a head of uncommitted changes. "
"Consider committing dataset changes before creating a view and logging runs "
"to enable traceability.",
ViewOnUncommittedDatasetWarning,
stacklevel=2,
)
else:
if not auto_commit:
warnings.warn(
f"Deeplake Dataset {dataset.path} has uncommitted head changes. "
"Consider committing dataset changes before logging runs to enable full traceability.",
UncommittedDatasetWarning,
stacklevel=2,
)

self.view_info = None

if dataset.is_view:
if auto_save_view and not dataset.has_head_changes:
self.view_info = dataset._get_view_info()
view_id = self.view_info.get('id', None)
try:
vds_path = dataset.save_view(message="autosave on aim run.", id=view_id, optimize=False)
except (NotImplementedError, ReadOnlyModeError) as e:
# views of in-memory datasets and read-only datasets cannot be saved. but keep the view id.
logger.info(f'autosave view on run: {str(e)} for dataset {dataset.path}.')
else:
logger.info(f'autosave view on run: dataset {dataset.path} with id {view_id} saved to {vds_path}.')
else:
if auto_commit and dataset.has_head_changes:
commit_id = dataset.commit(message="autocommit on aim run")
logger.info(f'autocommit on run: dataset {dataset.path} with commit id {commit_id}.')

self.storage['dataset'] = {
'source': 'deeplake',
'meta': self._get_ds_meta(dataset)
}

def _get_ds_meta(self, ds: deeplake.Dataset):
return {
"path": ds.path,
"commit_id": ds.commit_id,
"branch": ds.branch,
"has_head_changes": ds.has_head_changes,
"pending_commit_id": ds.pending_commit_id if ds.has_head_changes else None,
"info": dict(ds.info), # Info might contain keys such as "description" and "title"
"num_samples": ds.num_samples,
"max_len": ds.max_len,
"min_len": ds.min_len,
"is_view": ds.is_view,
"view_info": self.view_info,
"tensors": {group: self._tensor_meta(tensor) for group, tensor in ds.tensors.items()},
"size_approx": ds.size_approx(),
"deeplake_version": ds.meta.version
}

def _tensor_meta(self, tensor: deeplake.Tensor):
meta = tensor.meta
return {
"name": tensor.key,
"num_samples": len(tensor),
"htype": tensor.htype,
"dtype": str(tensor.dtype) if tensor.dtype else None,
"compression_type": "sample_compression"
if meta.sample_compression
else ("chunk_compression" if meta.chunk_compression else None),
"compression_format": meta.sample_compression or meta.chunk_compression,
"info": dict(tensor.info),
}
19 changes: 19 additions & 0 deletions aim/web/api/runs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ def str_to_range(range_str: str):
return IndexRange(start, stop)


def convert_nan_and_inf_to_str(tree):
if tree == float('inf'):
return 'inf'
if tree == float('-inf'):
return '-inf'
if tree != tree: # x == x is False for NaN, strings break math.isnan
return 'NaN'
if isinstance(tree, dict):
return {
key: convert_nan_and_inf_to_str(value)
for key, value in tree.items()
}
if isinstance(tree, tuple):
return tuple(convert_nan_and_inf_to_str(value) for value in tree)
if isinstance(tree, list):
return [convert_nan_and_inf_to_str(value) for value in tree]
return tree


def get_run_params(run: Run, *, skip_system: bool):
params = run.get(..., {}, resolve_objects=True)
if skip_system and '__system_params' in params:
Expand Down
3 changes: 3 additions & 0 deletions aim/web/api/runs/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from aim.web.api.runs.utils import (
checked_query,
collect_requested_metric_traces,
convert_nan_and_inf_to_str,
custom_aligned_metrics_streamer,
get_project_repo,
get_run_or_404,
Expand Down Expand Up @@ -152,6 +153,8 @@ async def run_params_api(run_id: str,
'traces': run.collect_sequence_info(sequence, skip_last_value=True),
'props': get_run_props(run)
}
# Convert NaN and Inf to strings
response = convert_nan_and_inf_to_str(response)

response['props'].update({
'notes': len(run.props.notes_obj)
Expand Down
2 changes: 1 addition & 1 deletion aim/web/ui/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "ui_v2",
"version": "3.17.4",
"version": "3.17.5",
"private": true,
"dependencies": {
"@aksel/structjs": "^1.0.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,19 @@
margin: 0 auto;
}
}

&__fullViewContent {
width: 100%;
height: 100%;
display: flex;
align-items: center;
background-color: $cuddle-10;
position: relative;
overflow: auto;
overflow: hidden;
padding: 2rem 0.5rem;
&__box {
display: inline-flex;
max-height: 100%;
margin: 0 auto;
overflow: auto;
}
&__depthSlider {
position: absolute;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class ExperimentDistributionCharts extends React.Component {
key: this.props.name,
});

this.computeHistogram(0);
this.computeHistogram(this.props.data.length - 1);
},
);
};
Expand Down
Loading