jalabort · jinyeom · Oct 3, 2019 · Oct 3, 2019 · Oct 3, 2019 · Oct 7, 2019
diff --git a/nucleus/box/tools.py b/nucleus/box/tools.py
@@ -86,10 +86,13 @@ def ijkl_to_xywh(ijkl: tf.Tensor) -> tf.Tensor:
 @export
 @name_scope
 def swap_axes_order(coords: tf.Tensor) -> tf.Tensor:
-    coord_indices = [1, 0, 3, 2]
-    other_indices = list(range(len(coord_indices), tf_get_shape(coords)[-1]))
-    indices = coord_indices + other_indices
-    return coords[..., indices]
+    return tf.concat([
+        coords[..., 1][None],
+        coords[..., 0][None],
+        coords[..., 3][None],
+        coords[..., 2][None],
+        coords[..., 4:]
+    ], axis=-1)
 
 
 @export

diff --git a/nucleus/dataset/base.py b/nucleus/dataset/base.py
@@ -230,22 +230,23 @@ def _create_image_from_row(row: pd.Series) -> Image:
 
         """
         path = row[DatasetKeys.PATH.value]
-        labels = row.get(DatasetKeys.LABELS.value)
+        attrs = row.get(DatasetKeys.ATTRS.value)
         boxes = row.get(DatasetKeys.BOXES.value)
-        boxes_labels = row.get(DatasetKeys.BOXES_LABELS.value)
+        labels = row.get(DatasetKeys.LABELS.value)
 
-        if boxes is not None:
+        if boxes not in [None, [[]]]:
             box_list = [
-                Box(ijhw=ijhw, labels=labels)
-                for ijhw, labels in zip(boxes, boxes_labels)
-                if None not in ijhw and all([c > 0 for c in ijhw[:2]])
+                Box.from_xywh(xywh=tf.convert_to_tensor(xywh),
+                              labels=box_labels)
+                for xywh, box_labels in zip(boxes, labels)
+                if None not in xywh and all([c > 0 for c in xywh[:2]])
             ]
         else:
             box_list = None
 
         return Image.from_path(
             path=path,
-            labels=labels,
+            labels=attrs,
             box_collection=box_list
         )
 
@@ -467,7 +468,7 @@ def create_random_split(
             sample = tf.random.uniform((), minval=0, maxval=1)
             if sample <= val_prop:
                 self.df.at[i, DatasetSplitKeys.RANDOM.value] = (
-                    DatasetPartitionKeys.VAL.value
+                    DatasetPartitionKeys.DEV.value
                 )
             if val_prop < sample <= test_prop + val_prop:
                 self.df.at[i, DatasetSplitKeys.RANDOM.value] = (
@@ -652,7 +653,7 @@ def _serialize_example(
     def get_ds(
             self,
             partition: Union[DatasetPartitionKeys, str],
-            split_column: Optional[Union[DatasetPartitionKeys, str]] = None,
+            split_column: Optional[Union[DatasetSplitKeys, str]] = None,
             n_examples: Optional[int] = None,
             shuffle: Optional[int] = 100,
             repeat: Optional[int] = 1,
@@ -758,53 +759,64 @@ def expand_list_column(self, column: str) -> None:
     def unique_elements_from_list_column(
             self,
             column: str,
+            flat: bool = True
             # label_position: Optional[Union[int, List[int]]] = None
     ) -> List[List[str]]:
         r"""
 
         Parameters
         ----------
         column
+        flat
         label_position
 
         Returns
         -------
 
         """
-        try:
-            is_list = not isinstance(self.df[column][0][0], list)
-        except IndexError:
-            is_list = True
-
-        if is_list:
-            # if label_position is None:
-            label_position = range(
-                np.max([len(labels) for labels in self.df[column]])
-            )
-
-            uniques = [[] for _ in label_position]
-            for labels in self.df[column]:
-                for i, label in enumerate(labels):
-                    if label is None:
-                        continue
-                    uniques[i].append(label)
-
-            return [sorted(list(set(unique))) for unique in uniques]
+        if flat:
+            labels_column = self.df[column]
+            labels_column = labels_column[labels_column.notnull()]
+            return [sorted(list(set([e for l in labels_column for e in l])))]
         else:
-            # if label_position is None:
-            label_position = range(len(self.df[column][0][0]))
-
-            uniques = [[] for _ in label_position]
-            for labels in self.df[column]:
-                if not isinstance(labels, list):
-                    continue
-                for label in labels:
-                    for i, l in enumerate(np.asanyarray(label)):
-                        if l is None:
+            try:
+                is_list = not isinstance(self.df[column][0][0], list)
+            except IndexError:
+                is_list = True
+
+            if is_list:
+                # rows with labels = None (as opposed to []) are not labelled
+                labels_column = self.df[column]
+                labels_column = labels_column[labels_column.notnull()]
+
+                # if label_position is None:
+                label_position = range(
+                    np.max([len(labels) for labels in labels_column])
+                )
+
+                uniques = [[] for _ in label_position]
+                for labels in labels_column:
+                    for i, label in enumerate(labels):
+                        if label is None:
                             continue
-                        uniques[i].append(l)
+                        uniques[i].append(label)
+
+                return [sorted(list(set(unique))) for unique in uniques]
+            else:
+                # if label_position is None:
+                label_position = range(len(self.df[column][0][0]))
+
+                uniques = [[] for _ in label_position]
+                for labels in self.df[column]:
+                    if not isinstance(labels, list):
+                        continue
+                    for label in labels:
+                        for i, l in enumerate(np.asanyarray(label)):
+                            if l is None:
+                                continue
+                            uniques[i].append(l)
 
-            return [sorted(list(set(unique))) for unique in uniques]
+                return [sorted(list(set(unique))) for unique in uniques]
 
     def view_row(self, index: int, image_args: Dict = None):
         _, image = self[index]

diff --git a/nucleus/dataset/detections.py b/nucleus/dataset/detections.py
@@ -207,7 +207,7 @@ def unique_boxes_labels(self) -> List[List[str]]:
 
         """
         return self.unique_elements_from_list_column(
-            column=DatasetKeys.BOXES_LABELS.value
+            column=DatasetKeys.LABELS.value
         )
 
     def view_row(self, index: int, image_args: Dict = None):

diff --git a/nucleus/dataset/jerseys.py b/nucleus/dataset/jerseys.py
@@ -53,10 +53,10 @@ def unique_labels(
 
         """
         if label_position is None:
-            label_position = range(len(self.df[DatasetKeys.LABELS.value][0]))
+            label_position = range(len(self.df[DatasetKeys.ATTRS.value][0]))
 
         uniques = [[] for _ in label_position]
-        for labels in self.df[DatasetKeys.LABELS.value]:
+        for labels in self.df[DatasetKeys.ATTRS.value]:
             for i, label in enumerate(np.asanyarray(labels)[label_position]):
                 if label is None:
                     continue
@@ -85,12 +85,12 @@ def create_label_count_df(
             df = self.df
 
         if label_position is None:
-            label_position = range(len(df[DatasetKeys.LABELS.value][0]))
+            label_position = range(len(df[DatasetKeys.ATTRS.value][0]))
         elif isinstance(label_position, int):
             label_position = [label_position]
 
         labels_dict = {}
-        for labels in df[DatasetKeys.LABELS.value]:
+        for labels in df[DatasetKeys.ATTRS.value]:
             for i, label in enumerate(np.asanyarray(labels)[label_position]):
                 if label is None:
                     continue

diff --git a/nucleus/dataset/keys.py b/nucleus/dataset/keys.py
@@ -8,26 +8,27 @@ class DatasetKeys(Enum):
     NAME = 'name'
     CACHE = 'cache'
     PATH = 'path'
+    ATTRS = 'attrs'
+    BOXES = 'bbxs'
     LABELS = 'labels'
-    BOXES = 'boxes'
-    BOXES_LABELS = 'boxes_labels'
-    N_BOXES = 'n_boxes'
+    N_BOXES = 'n_bbxs'
 
 
 @export
 class DatasetListKeys(Enum):
+    ATTRS = 'attrs'
+    BOXES = 'bbxs'
     LABELS = 'labels'
-    BOXES = 'boxes'
-    BOXES_LABELS = 'boxes_labels'
 
 
 @export
 class DatasetSplitKeys(Enum):
-    RANDOM = 'split_random'
+    RANDOM = 'set_split_random'
+    FEED = 'set_split_feed'
 
 
 @export
 class DatasetPartitionKeys(Enum):
     TEST = 'test'
-    VAL = 'val'
+    DEV = 'dev'
     TRAIN = 'train'
diff --git a/nucleus/dataset/tools/watson.py b/nucleus/dataset/tools/watson.py
@@ -145,9 +145,9 @@ def create_examples_from_jobs(
 
             yield {
                 DatasetKeys.PATH.value: example['source'],
-                DatasetKeys.LABELS.value: example['frameTags'],
+                DatasetKeys.ATTRS.value: example['frameTags'],
                 DatasetKeys.BOXES.value: ijhw_list,
-                DatasetKeys.BOXES_LABELS.value: labels_list,
+                DatasetKeys.LABELS.value: labels_list,
                 DatasetKeys.N_BOXES.value: len(ijhw_list),
                 'src': 'watson'
             }

diff --git a/nucleus/transform/base.py b/nucleus/transform/base.py
@@ -50,6 +50,23 @@ class DeterministicTransform(Transform):
     """
     n_factors: int
 
+    @staticmethod
+    def valid_boxes(boxes: tf.Tensor):
+        r"""
+
+        Parameters
+        ----------
+        boxes
+
+        Returns
+        -------
+
+        """
+        if boxes is None or tf.equal(tf.reduce_mean(boxes), -1):
+            return False
+
+        return True
+
     @abstractmethod
     def _operation(
             self,
@@ -165,19 +182,20 @@ def __call__(
         """
         # TODO: Some of these n_factors checks seem a bit hacky... Can we do
         #  better?
-        if self.transform.n_factors == -1:
+        if self.transform.n_factors == -1 and boxes is not None:
             factors_shape = tf_get_shape(boxes[..., :4])
         elif self.transform.n_factors == -2:
             factors_shape = tf_get_shape(image)
-        elif self.transform.n_factors == -3:
+        elif self.transform.n_factors == -3 and boxes is not None:
             factors_shape = ()
             self.min_factor = 0
             self.max_factor = tf.cast(
                 tf_get_shape(unpad_tensor(boxes))[0] - 1,
                 dtype=tf.float32
             )
         else:
-            factors_shape = (self.transform.n_factors,)
+            n_factors = self.transform.n_factors
+            factors_shape = (n_factors,) if n_factors > 0 else (1,)
 
         factors = tf.random.uniform(
             shape=factors_shape,