diff --git a/CHANGELOG.md b/CHANGELOG.md index 9086bad5f8..2d3b32577c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 3.17.5 Jun 2, 2023 + +- Fix gpu stat collection when driver is not loaded (mihran113) +- Fix issue with overflowing box content in full-view mode in Base Explorers (KaroMourad) +- Resolve tags list visibility issue in tags page (arsengit) +- Fix issue on git stat collection (mihran113) +- Import `Image` and `Audio` for `TensorboardFolderTracker` (alansaul) +- Extend `aim.ext.tensorboard_tracker.run.Run` to allow stdout logging and system stats and parameter logging (alansaul) +- Add the ability for `TensorboardFolderTracker` to track `Histogram`'s as Aim `Distribution`'s (alansaul) +- Convert NaNs and Infs in responses to strings (n-gao) +- Add activeloop deeplake plugin (drahnreb) + ## 3.17.4 May 4, 2023 - Resolve run messages duplication issue for in progress runs (roubkar) diff --git a/aim/VERSION b/aim/VERSION index 7ecf6173ae..3e6a3017fe 100644 --- a/aim/VERSION +++ b/aim/VERSION @@ -1 +1 @@ -3.17.4 +3.17.5 diff --git a/aim/ext/resource/stat.py b/aim/ext/resource/stat.py index c2b5a02e42..11ea9d5cf6 100644 --- a/aim/ext/resource/stat.py +++ b/aim/ext/resource/stat.py @@ -187,7 +187,7 @@ def get_stats(self): pass gpus.append(gpu_info) nvml.nvmlShutdown() - except (nvml.NVMLError_LibraryNotFound, nvml.NVMLError_NotSupported): + except nvml.NVMLError: pass return system, gpus diff --git a/aim/ext/tensorboard_tracker/run.py b/aim/ext/tensorboard_tracker/run.py index 4df3add602..c95b616e38 100644 --- a/aim/ext/tensorboard_tracker/run.py +++ b/aim/ext/tensorboard_tracker/run.py @@ -1,6 +1,6 @@ from typing import Optional, Union -from aim.sdk.run import BasicRun +from aim.sdk.run import Run as SdkRun from aim.ext.tensorboard_tracker.tracker import TensorboardTracker from typing import TYPE_CHECKING @@ -9,14 +9,23 @@ from aim.sdk.repo import Repo -class Run(BasicRun): - def __init__(self, run_hash: Optional[str] = None, *, - sync_tensorboard_log_dir: str, - repo: Optional[Union[str, 'Repo']] = None, - experiment: Optional[str] = None, - force_resume: Optional[bool] = False, - ): - super().__init__(run_hash, repo=repo, read_only=False, experiment=experiment, force_resume=force_resume) +class Run(SdkRun): + def __init__( + self, run_hash: Optional[str] = None, *, + sync_tensorboard_log_dir: str, + repo: Optional[Union[str, 'Repo']] = None, + experiment: Optional[str] = None, + force_resume: Optional[bool] = False, + system_tracking_interval: Optional[Union[int, float]] = None, + log_system_params: Optional[bool] = False, + capture_terminal_logs: Optional[bool] = False, + ): + super().__init__( + run_hash, repo=repo, read_only=False, experiment=experiment, force_resume=force_resume, + system_tracking_interval=system_tracking_interval, log_system_params=log_system_params, + capture_terminal_logs=capture_terminal_logs + ) + self['tb_log_directory'] = sync_tensorboard_log_dir self._tensorboard_tracker = TensorboardTracker(self._tracker, sync_tensorboard_log_dir) self._tensorboard_tracker.start() diff --git a/aim/ext/tensorboard_tracker/tracker.py b/aim/ext/tensorboard_tracker/tracker.py index ce00b5e1e0..ce65c12c4a 100644 --- a/aim/ext/tensorboard_tracker/tracker.py +++ b/aim/ext/tensorboard_tracker/tracker.py @@ -10,10 +10,63 @@ import weakref import queue -from typing import TYPE_CHECKING, Any +from typing import Any +from aim import Audio, Image, Distribution -if TYPE_CHECKING: - from aim import Audio, Image + +def _decode_histogram(value): + """ + From the tensorflow histogram representation (not plugin), create an aim Distribution + + :param value: value with `histo` property + :return: aim Distribution + """ + bin_counts = list(value.histo.bucket) + bucket_limits = list(value.histo.bucket_limit) + + if (len(bin_counts) <= 2) or (len(bucket_limits) < 2) or (bucket_limits[0] == bucket_limits[-1]): + return None + + # This is a bit weird but it seems the histogram counts is usually padded by 0 as tensorboard + # only stores the right limits? + # See https://github.com/pytorch/pytorch/blob/7d2a18da0b3427fcbe44b461a0aa508194535885/torch/utils/tensorboard/summary.py#L390 # noqa + bin_counts = bin_counts[1:] + + bin_range = (bucket_limits[0], bucket_limits[-1]) + track_val = Distribution(hist=bin_counts, bin_range=bin_range) + return track_val + + +def _decode_histogram_from_plugin(value): + """ + Convert from tensorflow histogram plugin representation of the data as a tensor back into + a `aim` `Distribution` + + Representation of histogram given by tf summary is obtained from here: + https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/histogram/summary_v2.py + + :param value: value with a tensor that contains three columns, left_edge, right_edge, + bin_values + :return: aim Distribution + """ + left_right_bins = tensor_util.make_ndarray(value.tensor) + if left_right_bins is None: + return None + + left_edge = left_right_bins[:, 0] + right_edge = left_right_bins[:, 1] + bin_counts = left_right_bins[:, 2] + + bin_range = (left_edge[0], right_edge[-1]) + + is_empty = False + is_empty |= (left_right_bins.shape[0] == 0) + is_empty |= (bin_range[0] == bin_range[1]) + if is_empty: + return None + + track_val = Distribution(hist=bin_counts, bin_range=bin_range) + return track_val class TensorboardTracker: @@ -70,7 +123,7 @@ def close(self): class TensorboardFolderTracker: def __init__(self, tensorboard_event_folder: str, queue: queue.Queue) -> None: self.queue = queue - self.supported_plugins = ("images", "scalars") + self.supported_plugins = ("images", "scalars", "histograms") self.unsupported_plugin_noticed = False self.folder_name = os.path.basename(tensorboard_event_folder) self._thread = threading.Thread(target=self._process_event) @@ -131,6 +184,8 @@ def create_ndarray(tensor): track_val = [Image(tf.image.decode_image(t).numpy()) for t in tensor] if len(track_val) == 1: track_val = track_val[0] + elif plugin_name == "histograms": + track_val = _decode_histogram_from_plugin(value) elif plugin_name == "scalars" or plugin_name == "": track_val = create_ndarray(value.tensor) else: @@ -142,6 +197,8 @@ def create_ndarray(tensor): elif value.HasField("audio"): tf_audio, sample_rate = tf.audio.decode_wav(value.audio.encoded_audio_string) track_val = Audio(tf_audio.numpy(), rate=sample_rate) + elif value.HasField("histo"): + track_val = _decode_histogram(value) except RuntimeError as exc: # catch all the nasty failures diff --git a/aim/ext/utils.py b/aim/ext/utils.py index 7b13987cc7..ba7ab6a020 100644 --- a/aim/ext/utils.py +++ b/aim/ext/utils.py @@ -57,7 +57,7 @@ def get_git_info(): try: commit_hash, commit_timestamp, commit_author = results.get('commit').split('/') - except ValueError: + except (ValueError, AttributeError): commit_hash = commit_timestamp = commit_author = None git_info.update({ diff --git a/aim/sdk/objects/plugins/deeplake_dataset.py b/aim/sdk/objects/plugins/deeplake_dataset.py new file mode 100644 index 0000000000..c2e5736e7f --- /dev/null +++ b/aim/sdk/objects/plugins/deeplake_dataset.py @@ -0,0 +1,141 @@ +from aim.storage.object import CustomObject +import deeplake +import warnings +import logging +from deeplake.util.exceptions import ReadOnlyModeError + +logger = logging.getLogger(__name__) + + +class UncommittedDatasetWarning(UserWarning): + pass + + +class ViewOnUncommittedDatasetWarning(UserWarning): + pass + + +@CustomObject.alias('deeplake.dataset') +class DeeplakeDataset(CustomObject): + """ + Track Activeloop Deeplake Dataset with versioning. + + It is generally advised to commit dataset changes before logging to runs. If dataset is a view on head of + uncommitted changes, it's recommmended to commit dataset changes first then creating a view and loggging it. + By default, this is ignored and results in limited traceability. Set both ``auto_commit`` and ``auto_save_view`` + to ``True`` for automatic commit or saves. + Unsaved views created on a dataset head with changes passed to this class cannot be saved. + + :param auto_commit: If dataset head node and uncommitted dataset changes are present an auto_commit + will trigger a dataset commit ``autocommit on aim run`` to enable reproducibility of the run, + defaults to ``False``. + + :param auto_save_view: Triggers a save of a view if dataset is an unsaved view on a committed head + to enable reproducibility of the run, defaults to ``False``. + + :raises TypeError: if the dataset is not a deeplake.Dataset + :raises ValueError: if the dataset is a view and has uncommitted changes on its head but should be saved. + + .. code-block:: python + + import deeplake + + from aim.sdk.objects.plugins.deeplake_dataset import DeeplakeDataset + from aim.sdk import Run + + # create dataset object + ds = deeplake.dataset('hub://activeloop/cifar100-test') + + # log dataset metadata + run = Run(system_tracking_interval=None) + run['deeplake_ds'] = DeeplakeDataset(ds) + """ + AIM_NAME = 'deeplake.dataset' + + def __init__(self, dataset: deeplake.Dataset, auto_commit: bool = False, auto_save_view: bool = False): + super().__init__() + + if not isinstance(dataset, deeplake.Dataset): + raise TypeError("dataset must be of type ``deeplake.Dataset``") + + if dataset.has_head_changes: + if dataset.is_view: + if any((auto_commit, auto_save_view)): + raise ValueError( + "Dataset is a view on head of uncommitted changes. " + "Commit dataset changes before creating a view. " + "To ignore with limited traceability set both" + "``auto_commit`` and ``auto_save_view`` to ``False``." + ) + else: + warnings.warn( + "There is little to trace back data to this run. " + "Dataset is a view on a head of uncommitted changes. " + "Consider committing dataset changes before creating a view and logging runs " + "to enable traceability.", + ViewOnUncommittedDatasetWarning, + stacklevel=2, + ) + else: + if not auto_commit: + warnings.warn( + f"Deeplake Dataset {dataset.path} has uncommitted head changes. " + "Consider committing dataset changes before logging runs to enable full traceability.", + UncommittedDatasetWarning, + stacklevel=2, + ) + + self.view_info = None + + if dataset.is_view: + if auto_save_view and not dataset.has_head_changes: + self.view_info = dataset._get_view_info() + view_id = self.view_info.get('id', None) + try: + vds_path = dataset.save_view(message="autosave on aim run.", id=view_id, optimize=False) + except (NotImplementedError, ReadOnlyModeError) as e: + # views of in-memory datasets and read-only datasets cannot be saved. but keep the view id. + logger.info(f'autosave view on run: {str(e)} for dataset {dataset.path}.') + else: + logger.info(f'autosave view on run: dataset {dataset.path} with id {view_id} saved to {vds_path}.') + else: + if auto_commit and dataset.has_head_changes: + commit_id = dataset.commit(message="autocommit on aim run") + logger.info(f'autocommit on run: dataset {dataset.path} with commit id {commit_id}.') + + self.storage['dataset'] = { + 'source': 'deeplake', + 'meta': self._get_ds_meta(dataset) + } + + def _get_ds_meta(self, ds: deeplake.Dataset): + return { + "path": ds.path, + "commit_id": ds.commit_id, + "branch": ds.branch, + "has_head_changes": ds.has_head_changes, + "pending_commit_id": ds.pending_commit_id if ds.has_head_changes else None, + "info": dict(ds.info), # Info might contain keys such as "description" and "title" + "num_samples": ds.num_samples, + "max_len": ds.max_len, + "min_len": ds.min_len, + "is_view": ds.is_view, + "view_info": self.view_info, + "tensors": {group: self._tensor_meta(tensor) for group, tensor in ds.tensors.items()}, + "size_approx": ds.size_approx(), + "deeplake_version": ds.meta.version + } + + def _tensor_meta(self, tensor: deeplake.Tensor): + meta = tensor.meta + return { + "name": tensor.key, + "num_samples": len(tensor), + "htype": tensor.htype, + "dtype": str(tensor.dtype) if tensor.dtype else None, + "compression_type": "sample_compression" + if meta.sample_compression + else ("chunk_compression" if meta.chunk_compression else None), + "compression_format": meta.sample_compression or meta.chunk_compression, + "info": dict(tensor.info), + } diff --git a/aim/web/api/runs/utils.py b/aim/web/api/runs/utils.py index 3da79439aa..a0dbe6c6ff 100644 --- a/aim/web/api/runs/utils.py +++ b/aim/web/api/runs/utils.py @@ -54,6 +54,25 @@ def str_to_range(range_str: str): return IndexRange(start, stop) +def convert_nan_and_inf_to_str(tree): + if tree == float('inf'): + return 'inf' + if tree == float('-inf'): + return '-inf' + if tree != tree: # x == x is False for NaN, strings break math.isnan + return 'NaN' + if isinstance(tree, dict): + return { + key: convert_nan_and_inf_to_str(value) + for key, value in tree.items() + } + if isinstance(tree, tuple): + return tuple(convert_nan_and_inf_to_str(value) for value in tree) + if isinstance(tree, list): + return [convert_nan_and_inf_to_str(value) for value in tree] + return tree + + def get_run_params(run: Run, *, skip_system: bool): params = run.get(..., {}, resolve_objects=True) if skip_system and '__system_params' in params: diff --git a/aim/web/api/runs/views.py b/aim/web/api/runs/views.py index b0a400df5d..684bbac5f4 100644 --- a/aim/web/api/runs/views.py +++ b/aim/web/api/runs/views.py @@ -16,6 +16,7 @@ from aim.web.api.runs.utils import ( checked_query, collect_requested_metric_traces, + convert_nan_and_inf_to_str, custom_aligned_metrics_streamer, get_project_repo, get_run_or_404, @@ -152,6 +153,8 @@ async def run_params_api(run_id: str, 'traces': run.collect_sequence_info(sequence, skip_last_value=True), 'props': get_run_props(run) } + # Convert NaN and Inf to strings + response = convert_nan_and_inf_to_str(response) response['props'].update({ 'notes': len(run.props.notes_obj) diff --git a/aim/web/ui/package.json b/aim/web/ui/package.json index 596eaab73b..79d842232b 100644 --- a/aim/web/ui/package.json +++ b/aim/web/ui/package.json @@ -1,6 +1,6 @@ { "name": "ui_v2", - "version": "3.17.4", + "version": "3.17.5", "private": true, "dependencies": { "@aksel/structjs": "^1.0.0", diff --git a/aim/web/ui/src/modules/BaseExplorer/components/BoxWrapper/BoxWrapper.scss b/aim/web/ui/src/modules/BaseExplorer/components/BoxWrapper/BoxWrapper.scss index da1ff018eb..1555436ffd 100644 --- a/aim/web/ui/src/modules/BaseExplorer/components/BoxWrapper/BoxWrapper.scss +++ b/aim/web/ui/src/modules/BaseExplorer/components/BoxWrapper/BoxWrapper.scss @@ -36,7 +36,6 @@ margin: 0 auto; } } - &__fullViewContent { width: 100%; height: 100%; @@ -44,10 +43,12 @@ align-items: center; background-color: $cuddle-10; position: relative; - overflow: auto; + overflow: hidden; + padding: 2rem 0.5rem; &__box { - display: inline-flex; + max-height: 100%; margin: 0 auto; + overflow: auto; } &__depthSlider { position: absolute; diff --git a/aim/web/ui/src/pages/RunDetail/DistributionsVisualizer/temp/Wrapper.jsx b/aim/web/ui/src/pages/RunDetail/DistributionsVisualizer/temp/Wrapper.jsx index d5ed51aeaa..4727604744 100644 --- a/aim/web/ui/src/pages/RunDetail/DistributionsVisualizer/temp/Wrapper.jsx +++ b/aim/web/ui/src/pages/RunDetail/DistributionsVisualizer/temp/Wrapper.jsx @@ -134,7 +134,7 @@ class ExperimentDistributionCharts extends React.Component { key: this.props.name, }); - this.computeHistogram(0); + this.computeHistogram(this.props.data.length - 1); }, ); }; diff --git a/aim/web/ui/src/pages/Tags/Tags.scss b/aim/web/ui/src/pages/Tags/Tags.scss index c61c335951..3b47038f7a 100644 --- a/aim/web/ui/src/pages/Tags/Tags.scss +++ b/aim/web/ui/src/pages/Tags/Tags.scss @@ -105,7 +105,7 @@ } } &__tagListBox { - height: calc(100% - 73px); + height: calc(100vh - 147px); position: relative; overflow: hidden; border-radius: 0 0 toRem(6px) toRem(6px); diff --git a/tests/README.md b/tests/README.md index 3b7f693039..8dbe0c6f2e 100644 --- a/tests/README.md +++ b/tests/README.md @@ -5,6 +5,7 @@ Be able to test the correctness of the - `aim engine` - `aim sdk` - `aim ql` + - `extensions` ### Folder Structure @@ -16,6 +17,8 @@ tests test_*.py ql test_*.py + ext + test_*.py ``` ## Run diff --git a/tests/ext/__init__.py b/tests/ext/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/ext/test_tensorboard_run.py b/tests/ext/test_tensorboard_run.py new file mode 100644 index 0000000000..2738244eb3 --- /dev/null +++ b/tests/ext/test_tensorboard_run.py @@ -0,0 +1,86 @@ +import time + +from aim.ext.resource.log import LogLine +from aim.ext.tensorboard_tracker.run import Run as TensorboardRun + +from tests.base import TestBase +from tests.utils import full_class_name + + +class TestTensorboardRun(TestBase): + + def test_tensorboard_tracker_run__default_no_capture(self): + # Given + run = TensorboardRun(sync_tensorboard_log_dir="dummy", repo=self.repo) + run['testcase'] = full_class_name(TensorboardRun) + run_hash = run.hash + console_statement = 'no console capture is being done' + + # When + print(console_statement) + time.sleep(3) # allow tracker to add resource usage metrics + del run + + # Then + tracked_run = self.repo.get_run(run_hash) + self.assertIsNone(tracked_run.metrics().dataframe()) + + def test_tensorboard_tracker_run__system_stats_captured(self): + # Given + run = TensorboardRun( + sync_tensorboard_log_dir="dummy", repo=self.repo, system_tracking_interval=1 + ) + run['testcase'] = full_class_name(TensorboardRun) + run_hash = run.hash + + # When + time.sleep(3) # allow tracker to add resource usage metrics + del run + + # Then + tracked_run = self.repo.get_run(run_hash) + metrics_recorded = set(tracked_run.metrics().dataframe()['metric.name'].unique()) + self.assertTrue("__system__cpu" in metrics_recorded) + + def test_tensorboard_tracker_run__terminal_capture(self): + # Given + run = TensorboardRun( + sync_tensorboard_log_dir="dummy", repo=self.repo, capture_terminal_logs=True, + ) + run['testcase'] = full_class_name(TensorboardRun) + run_hash = run.hash + console_statement = 'no console capture has worked' + + # When + print(console_statement) + time.sleep(3) # allow tracker to add terminal logs + del run + + # Then + tracked_run = self.repo.get_run(run_hash) + terminal_logs = tracked_run.get_terminal_logs() + log_found = False + for log_item in terminal_logs.data.values(): + log_line = log_item[0][0] + if isinstance(log_line, LogLine): + if console_statement in str(log_line.data): + log_found = True + + self.assertTrue(log_found) + + def test_tensorboard_tracker_run__system_params_captured(self): + # Given + run = TensorboardRun( + sync_tensorboard_log_dir="dummy", repo=self.repo, log_system_params=True + ) + run['testcase'] = full_class_name(TensorboardRun) + run_hash = run.hash + + # When + time.sleep(3) # allow tracker to add system params + del run + + # Then + tracked_run = self.repo.get_run(run_hash) + system_params = tracked_run.get('__system_params') + self.assertIsNotNone(system_params) diff --git a/tests/ext/test_tensorboard_tracker.py b/tests/ext/test_tensorboard_tracker.py new file mode 100644 index 0000000000..57cea9e4b7 --- /dev/null +++ b/tests/ext/test_tensorboard_tracker.py @@ -0,0 +1,268 @@ +from queue import Queue +import numpy as np +from PIL import ImageChops as PILImageChops +import tensorflow as tf +from tensorboard.compat.proto.summary_pb2 import SummaryMetadata, Summary +from tensorboard.compat.proto.tensor_pb2 import TensorProto +from tensorboard.compat.proto.tensor_shape_pb2 import TensorShapeProto +from tensorboard.compat.proto.event_pb2 import Event +from tensorboard.util.tensor_util import make_tensor_proto +from torch.utils.tensorboard.summary import image, scalar, histogram, histogram_raw + +from aim import Image, Distribution +from aim.ext.tensorboard_tracker.tracker import TensorboardFolderTracker + +from tests.base import TestBase + + +def images_same_data(image1: Image, image2: Image) -> bool: + """ + Compare two Aim images to see if they contain the same values + """ + image_diff = PILImageChops.difference( + image1.to_pil_image(), image2.to_pil_image() + ) + return image_diff.getbbox() is None + + +class TestTensorboardTracker(TestBase): + + def test__process_tb_image_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + height, width, channels = 5, 4, 3 + # Note channels is last + image_np = np.random.randint(0, 16, (height, width, channels)).astype(dtype=np.uint8) + # Create image summary in standard format + image_summary = image(tag='test_image', tensor=image_np, dataformats='HWC') + event = Event(summary=image_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_image = queue.get().value + original_image = Image(image_np) + self.assertTrue(isinstance(tracked_image, Image)) + self.assertTrue(tracked_image.size == original_image.size) + self.assertTrue(images_same_data(tracked_image, original_image)) + + def test__process_tb_image_plugin_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + height, width, channels = 5, 4, 3 + # Note channels is last + image_np = np.random.randint(0, 16, (height, width, channels)).astype(dtype=np.uint8) + # Create image summary in format of plugin + plugin_data = SummaryMetadata.PluginData(plugin_name='images') + smd = SummaryMetadata(plugin_data=plugin_data, ) + tensor = TensorProto(dtype='DT_STRING', + string_val=[ + f"{height}".encode(encoding='utf_8'), + f"{width}".encode(encoding='utf_8'), + tf.image.encode_png(image_np).numpy(), + ], + tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)])) + + image_summary = Summary( + value=[Summary.Value(tag='test_image', metadata=smd, tensor=tensor)] + ) + event = Event(summary=image_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_image = queue.get().value + original_image = Image(image_np) + self.assertTrue(isinstance(tracked_image, Image)) + self.assertTrue(tracked_image.size == original_image.size) + self.assertTrue(images_same_data(tracked_image, original_image)) + + def test__process_tb_scalar_simple_value_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + scalar_np = np.array(0.32, dtype=np.float32) + scalar_summary = scalar('test_scalar', scalar_np, new_style=False) + event = Event(summary=scalar_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_scalar = queue.get().value + self.assertTrue(isinstance(tracked_scalar, float)) + self.assertTrue(np.allclose(tracked_scalar, scalar_np)) + + def test__process_tb_scalar_plugin_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + scalar_np = np.array(0.32, dtype=np.float32) + scalar_summary = scalar('test_scalar', scalar_np, new_style=True) + event = Event(summary=scalar_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_scalar = queue.get().value + self.assertTrue(isinstance(tracked_scalar, np.ndarray)) + self.assertTrue(np.allclose(tracked_scalar, scalar_np)) + + def test__process_tb_histogram_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + batch_dim, num_samples, num_bins = 3, 31, 11 + histogram_samples_values_np = np.random.randn(batch_dim, num_samples) + histogram_counts_np, histogram_bin_edges_np = np.histogram(histogram_samples_values_np, bins=num_bins) + histogram_summary = histogram('test_histogram', values=histogram_samples_values_np, bins=num_bins) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_histogram = queue.get().value + self.assertTrue(isinstance(tracked_histogram, Distribution)) + tracked_counts_np, tracked_bin_edges_np = tracked_histogram.to_np_histogram() + self.assertTrue(np.allclose(tracked_counts_np, histogram_counts_np)) + self.assertTrue(np.allclose(tracked_bin_edges_np, histogram_bin_edges_np)) + + def test__process_tb_histogram_event_empty_all_zeros(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + num_bins = 11 + + histogram_summary = histogram_raw( + name='test_histogram', + min=0.0, + max=0.0, + num=num_bins, + sum=0.0, + sum_squares=0.0, + bucket_limits=[0.0]*num_bins, + bucket_counts=[0.0]*num_bins, + ) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + self.assertTrue(queue.empty()) + + def test__process_tb_histogram_event_empty_no_values(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + + histogram_summary = histogram_raw( + name='test_histogram', + min=0.0, + max=0.0, + num=0, + sum=0.0, + sum_squares=0.0, + bucket_limits=[], + bucket_counts=[], + ) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + self.assertTrue(queue.empty()) + + def test__process_tb_histogram_plugin_event(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + batch_dim, num_samples, num_bins = 3, 31, 11 + histogram_samples_values_np = np.random.randn(batch_dim, num_samples) + histogram_counts_np, histogram_bin_edges_np = np.histogram(histogram_samples_values_np, bins=num_bins) + + # Create histogram summary in format of plugin + plugin_data = SummaryMetadata.PluginData(plugin_name='histograms') + smd = SummaryMetadata(plugin_data=plugin_data) + left_edge_np = histogram_bin_edges_np[:-1] + right_edge_np = histogram_bin_edges_np[1:] + tensor_content = np.hstack([ + left_edge_np[:, None], right_edge_np[:, None], histogram_counts_np[:, None] + ]) + # float64 for DT_DOUBLE + tensor = make_tensor_proto(tensor_content.astype(dtype=np.float64)) + histogram_summary = Summary( + value=[Summary.Value(tag='test_histogram', metadata=smd, tensor=tensor)] + ) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + tracked_histogram = queue.get().value + self.assertTrue(isinstance(tracked_histogram, Distribution)) + tracked_counts_np, tracked_bin_edges_np = tracked_histogram.to_np_histogram() + self.assertTrue(np.allclose(tracked_counts_np, histogram_counts_np)) + self.assertTrue(np.allclose(tracked_bin_edges_np, histogram_bin_edges_np)) + + def test__process_tb_histogram_plugin_event_empty_no_values(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + num_bins = 11 + histogram_counts_np = np.zeros(num_bins) + left_edge_np = np.zeros(num_bins) + right_edge_np = np.zeros(num_bins) + + # Create histogram summary in format of plugin + plugin_data = SummaryMetadata.PluginData(plugin_name='histograms') + smd = SummaryMetadata(plugin_data=plugin_data) + tensor_content = np.hstack([ + left_edge_np[:, None], right_edge_np[:, None], histogram_counts_np[:, None] + ]) + tensor = make_tensor_proto(tensor_content.astype(dtype=np.float64)) + histogram_summary = Summary( + value=[Summary.Value(tag='test_histogram', metadata=smd, tensor=tensor)] + ) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + self.assertTrue(queue.empty()) + + def test__process_tb_histogram_plugin_event_empty_all_zeros(self): + # Given + queue = Queue() + tracker = TensorboardFolderTracker(tensorboard_event_folder='dummy', queue=queue) + num_bins = 11 + histogram_counts_np = np.zeros(num_bins) + left_edge_np = np.zeros(num_bins) + right_edge_np = np.zeros(num_bins) + + # Create histogram summary in format of plugin + plugin_data = SummaryMetadata.PluginData(plugin_name='histograms') + smd = SummaryMetadata(plugin_data=plugin_data) + tensor_content = np.hstack([ + left_edge_np[:, None], right_edge_np[:, None], histogram_counts_np[:, None] + ]) + tensor = make_tensor_proto(tensor_content.astype(dtype=np.float64)) + histogram_summary = Summary( + value=[Summary.Value(tag='test_histogram', metadata=smd, tensor=tensor)] + ) + event = Event(summary=histogram_summary) + + # When + tracker._process_tb_event(event) + + # Then + self.assertTrue(queue.empty()) diff --git a/tests/integrations/test_deeplake_dataset.py b/tests/integrations/test_deeplake_dataset.py new file mode 100644 index 0000000000..43c2cc8165 --- /dev/null +++ b/tests/integrations/test_deeplake_dataset.py @@ -0,0 +1,29 @@ +import pytest + +from tests.base import TestBase +from tests.utils import is_package_installed + + +class TestDeeplakeDatasetIntegration(TestBase): + @pytest.mark.skipif(not is_package_installed('deeplake'), reason="'deeplake' is not installed. skipping.") + def test_dataset_as_run_param(self): + import deeplake + + from aim.sdk.objects.plugins.deeplake_dataset import DeeplakeDataset + from aim.sdk import Run + + # create dataset object + ds = deeplake.dataset('hub://activeloop/cifar100-test') + + # log dataset metadata + run = Run(system_tracking_interval=None) + run['deeplake_ds'] = DeeplakeDataset(ds) + + # get dataset metadata + ds_object = run['deeplake_ds'] + ds_dict = run.get('deeplake_ds', resolve_objects=True) + + self.assertTrue(isinstance(ds_object, DeeplakeDataset)) + self.assertTrue(isinstance(ds_dict, dict)) + self.assertIn('meta', ds_dict['dataset'].keys()) + self.assertIn('source', ds_dict['dataset'].keys()) diff --git a/tests/requirements.txt b/tests/requirements.txt index fce25e23ae..d37a09ae6c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,6 +1,7 @@ -r ../requirements.txt torch tensorflow +deeplake # hub fastapi>=0.87.0 httpx