Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed dataset metadata to match the new format #29

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions nucleus/box/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,13 @@ def ijkl_to_xywh(ijkl: tf.Tensor) -> tf.Tensor:
@export
@name_scope
def swap_axes_order(coords: tf.Tensor) -> tf.Tensor:
coord_indices = [1, 0, 3, 2]
other_indices = list(range(len(coord_indices), tf_get_shape(coords)[-1]))
indices = coord_indices + other_indices
return coords[..., indices]
return tf.concat([
coords[..., 1][None],
coords[..., 0][None],
coords[..., 3][None],
coords[..., 2][None],
coords[..., 4:]
], axis=-1)


@export
Expand Down
92 changes: 52 additions & 40 deletions nucleus/dataset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,22 +230,23 @@ def _create_image_from_row(row: pd.Series) -> Image:

"""
path = row[DatasetKeys.PATH.value]
labels = row.get(DatasetKeys.LABELS.value)
attrs = row.get(DatasetKeys.ATTRS.value)
boxes = row.get(DatasetKeys.BOXES.value)
boxes_labels = row.get(DatasetKeys.BOXES_LABELS.value)
labels = row.get(DatasetKeys.LABELS.value)

if boxes is not None:
if boxes not in [None, [[]]]:
box_list = [
Box(ijhw=ijhw, labels=labels)
for ijhw, labels in zip(boxes, boxes_labels)
if None not in ijhw and all([c > 0 for c in ijhw[:2]])
Box.from_xywh(xywh=tf.convert_to_tensor(xywh),
labels=box_labels)
for xywh, box_labels in zip(boxes, labels)
if None not in xywh and all([c > 0 for c in xywh[:2]])
]
else:
box_list = None

return Image.from_path(
path=path,
labels=labels,
labels=attrs,
box_collection=box_list
)

Expand Down Expand Up @@ -467,7 +468,7 @@ def create_random_split(
sample = tf.random.uniform((), minval=0, maxval=1)
if sample <= val_prop:
self.df.at[i, DatasetSplitKeys.RANDOM.value] = (
DatasetPartitionKeys.VAL.value
DatasetPartitionKeys.DEV.value
)
if val_prop < sample <= test_prop + val_prop:
self.df.at[i, DatasetSplitKeys.RANDOM.value] = (
Expand Down Expand Up @@ -652,7 +653,7 @@ def _serialize_example(
def get_ds(
self,
partition: Union[DatasetPartitionKeys, str],
split_column: Optional[Union[DatasetPartitionKeys, str]] = None,
split_column: Optional[Union[DatasetSplitKeys, str]] = None,
n_examples: Optional[int] = None,
shuffle: Optional[int] = 100,
repeat: Optional[int] = 1,
Expand Down Expand Up @@ -758,53 +759,64 @@ def expand_list_column(self, column: str) -> None:
def unique_elements_from_list_column(
self,
column: str,
flat: bool = True
# label_position: Optional[Union[int, List[int]]] = None
) -> List[List[str]]:
r"""

Parameters
----------
column
flat
label_position

Returns
-------

"""
try:
is_list = not isinstance(self.df[column][0][0], list)
except IndexError:
is_list = True

if is_list:
# if label_position is None:
label_position = range(
np.max([len(labels) for labels in self.df[column]])
)

uniques = [[] for _ in label_position]
for labels in self.df[column]:
for i, label in enumerate(labels):
if label is None:
continue
uniques[i].append(label)

return [sorted(list(set(unique))) for unique in uniques]
if flat:
labels_column = self.df[column]
labels_column = labels_column[labels_column.notnull()]
return [sorted(list(set([e for l in labels_column for e in l])))]
else:
# if label_position is None:
label_position = range(len(self.df[column][0][0]))

uniques = [[] for _ in label_position]
for labels in self.df[column]:
if not isinstance(labels, list):
continue
for label in labels:
for i, l in enumerate(np.asanyarray(label)):
if l is None:
try:
is_list = not isinstance(self.df[column][0][0], list)
except IndexError:
is_list = True

if is_list:
# rows with labels = None (as opposed to []) are not labelled
labels_column = self.df[column]
labels_column = labels_column[labels_column.notnull()]

# if label_position is None:
label_position = range(
np.max([len(labels) for labels in labels_column])
)

uniques = [[] for _ in label_position]
for labels in labels_column:
for i, label in enumerate(labels):
if label is None:
continue
uniques[i].append(l)
uniques[i].append(label)

return [sorted(list(set(unique))) for unique in uniques]
else:
# if label_position is None:
label_position = range(len(self.df[column][0][0]))

uniques = [[] for _ in label_position]
for labels in self.df[column]:
if not isinstance(labels, list):
continue
for label in labels:
for i, l in enumerate(np.asanyarray(label)):
if l is None:
continue
uniques[i].append(l)

return [sorted(list(set(unique))) for unique in uniques]
return [sorted(list(set(unique))) for unique in uniques]

def view_row(self, index: int, image_args: Dict = None):
_, image = self[index]
Expand Down
2 changes: 1 addition & 1 deletion nucleus/dataset/detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def unique_boxes_labels(self) -> List[List[str]]:

"""
return self.unique_elements_from_list_column(
column=DatasetKeys.BOXES_LABELS.value
column=DatasetKeys.LABELS.value
)

def view_row(self, index: int, image_args: Dict = None):
Expand Down
8 changes: 4 additions & 4 deletions nucleus/dataset/jerseys.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ def unique_labels(

"""
if label_position is None:
label_position = range(len(self.df[DatasetKeys.LABELS.value][0]))
label_position = range(len(self.df[DatasetKeys.ATTRS.value][0]))

uniques = [[] for _ in label_position]
for labels in self.df[DatasetKeys.LABELS.value]:
for labels in self.df[DatasetKeys.ATTRS.value]:
for i, label in enumerate(np.asanyarray(labels)[label_position]):
if label is None:
continue
Expand Down Expand Up @@ -85,12 +85,12 @@ def create_label_count_df(
df = self.df

if label_position is None:
label_position = range(len(df[DatasetKeys.LABELS.value][0]))
label_position = range(len(df[DatasetKeys.ATTRS.value][0]))
elif isinstance(label_position, int):
label_position = [label_position]

labels_dict = {}
for labels in df[DatasetKeys.LABELS.value]:
for labels in df[DatasetKeys.ATTRS.value]:
for i, label in enumerate(np.asanyarray(labels)[label_position]):
if label is None:
continue
Expand Down
15 changes: 8 additions & 7 deletions nucleus/dataset/keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,27 @@ class DatasetKeys(Enum):
NAME = 'name'
CACHE = 'cache'
PATH = 'path'
ATTRS = 'attrs'
BOXES = 'bbxs'
LABELS = 'labels'
BOXES = 'boxes'
BOXES_LABELS = 'boxes_labels'
N_BOXES = 'n_boxes'
N_BOXES = 'n_bbxs'


@export
class DatasetListKeys(Enum):
ATTRS = 'attrs'
BOXES = 'bbxs'
LABELS = 'labels'
BOXES = 'boxes'
BOXES_LABELS = 'boxes_labels'


@export
class DatasetSplitKeys(Enum):
RANDOM = 'split_random'
RANDOM = 'set_split_random'
FEED = 'set_split_feed'


@export
class DatasetPartitionKeys(Enum):
TEST = 'test'
VAL = 'val'
DEV = 'dev'
TRAIN = 'train'
4 changes: 2 additions & 2 deletions nucleus/dataset/tools/watson.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ def create_examples_from_jobs(

yield {
DatasetKeys.PATH.value: example['source'],
DatasetKeys.LABELS.value: example['frameTags'],
DatasetKeys.ATTRS.value: example['frameTags'],
DatasetKeys.BOXES.value: ijhw_list,
DatasetKeys.BOXES_LABELS.value: labels_list,
DatasetKeys.LABELS.value: labels_list,
DatasetKeys.N_BOXES.value: len(ijhw_list),
'src': 'watson'
}
Expand Down
24 changes: 21 additions & 3 deletions nucleus/transform/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,23 @@ class DeterministicTransform(Transform):
"""
n_factors: int

@staticmethod
def valid_boxes(boxes: tf.Tensor):
r"""

Parameters
----------
boxes

Returns
-------

"""
if boxes is None or tf.equal(tf.reduce_mean(boxes), -1):
return False

return True

@abstractmethod
def _operation(
self,
Expand Down Expand Up @@ -165,19 +182,20 @@ def __call__(
"""
# TODO: Some of these n_factors checks seem a bit hacky... Can we do
# better?
if self.transform.n_factors == -1:
if self.transform.n_factors == -1 and boxes is not None:
factors_shape = tf_get_shape(boxes[..., :4])
elif self.transform.n_factors == -2:
factors_shape = tf_get_shape(image)
elif self.transform.n_factors == -3:
elif self.transform.n_factors == -3 and boxes is not None:
factors_shape = ()
self.min_factor = 0
self.max_factor = tf.cast(
tf_get_shape(unpad_tensor(boxes))[0] - 1,
dtype=tf.float32
)
else:
factors_shape = (self.transform.n_factors,)
n_factors = self.transform.n_factors
factors_shape = (n_factors,) if n_factors > 0 else (1,)

factors = tf.random.uniform(
shape=factors_shape,
Expand Down
Loading