-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor codebase for new euclid workflow usage with tests (#180)
* refactor codebase for new euclid workflow usage with tests * add base_extractor to galaxyzoo module, use class label keys and prefexises for extractor tests, remove error on baseclass * remove shared class implementation
- Loading branch information
Showing
24 changed files
with
366 additions
and
284 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# frozen_string_literal: true | ||
|
||
module LabelExtractors | ||
module GalaxyZoo | ||
class BaseExtractor | ||
attr_reader :task_lookup_key, :task_prefix_label | ||
|
||
def initialize(task_lookup_key) | ||
@task_lookup_key = task_lookup_key | ||
@task_prefix_label = task_prefix | ||
end | ||
|
||
# extract the keys from the reduction data payload hash | ||
# and convert the keys to the workflow question tasks | ||
# | ||
# e.g. workflow type (GZ) are question type 'decision tree' tasks | ||
# looking at the 'T0' task it correlates to 3 exclusive answers: | ||
# 0 (smooth) | ||
# 1 (features or disk) | ||
# 2 (star or artifact) | ||
# | ||
# then combined with the label prefix used to identify the correlated task name for Zoobot | ||
def extract(data_hash) | ||
data_hash.transform_keys do |key| | ||
# create the lable key used for column headers in the derived training catalogues | ||
# note the hyphen and underscore formatting, see Zoobot label schema for more details | ||
"#{task_prefix_label}-#{data_release_suffix}_#{data_payload_label(key)}" | ||
end | ||
end | ||
|
||
def self.label_prefixes | ||
self::TASK_KEY_LABEL_PREFIXES | ||
end | ||
|
||
def self.data_labels | ||
self::TASK_KEY_DATA_LABELS | ||
end | ||
|
||
# provide a flat task question and answers list for the decals mission catalogues | ||
def self.question_answers_schema | ||
label_prefixes.map do |task_key, question_prefix| | ||
data_labels[task_key].values.map do |answer_suffix| | ||
"#{question_prefix}-#{data_release_suffix}_#{answer_suffix}" | ||
end | ||
end.flatten | ||
end | ||
|
||
private | ||
|
||
def task_prefix | ||
prefix = self.class::TASK_KEY_LABEL_PREFIXES[task_lookup_key] | ||
raise UnknownTaskKey, "key not found: #{task_lookup_key}" unless prefix | ||
|
||
prefix | ||
end | ||
|
||
def data_payload_label(key) | ||
label = self.class::TASK_KEY_DATA_LABELS.dig(task_lookup_key, key) | ||
raise UnknownLabelKey, "key not found: #{key}" unless label | ||
|
||
label | ||
end | ||
|
||
def data_release_suffix | ||
self.class::data_release_suffix | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# frozen_string_literal: true | ||
|
||
module LabelExtractors | ||
module GalaxyZoo | ||
class Euclid < BaseExtractor | ||
|
||
attr_reader :task_lookup_key, :task_prefix_label | ||
|
||
# Derived to conform to the existing catalogue schema for Zoobot euclid | ||
# https://github.com/mwalmsley/galaxy-datasets/blob/eed30d3e37b5559d0427c339e8dc1d2a9dc2d004/galaxy_datasets/shared/label_metadata.py#L462 | ||
TASK_KEY_LABEL_PREFIXES = { | ||
'T0' => 'smooth-or-featured', | ||
'T1' => 'how-rounded', | ||
'T2' => 'disk-edge-on', | ||
'T3' => 'edge-on-bulge', | ||
'T4' => 'bar', | ||
'T5' => 'has-spiral-arms', | ||
'T6' => 'spiral-winding', | ||
'T7' => 'spiral-arm-count', | ||
'T8' => 'bulge-size', | ||
'T11' => 'merging', # T10 is not used for training and no T9 in prod :shrug: | ||
'T12' => 'lensing', | ||
'T13' => 'clumps', | ||
'T14' => 'problem', | ||
'T15' => 'artifact' | ||
}.freeze | ||
TASK_KEY_DATA_LABELS = { | ||
'T0' => { | ||
'0' => 'smooth', | ||
'1' => 'featured-or-disk', | ||
'2' => 'problem' | ||
}, | ||
'T1' => { | ||
'0' => 'round', | ||
'1' => 'in-between', | ||
'2' => 'cigar-shaped' | ||
}, | ||
'T2' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T3' => { | ||
'0' => 'rounded', | ||
'1' => 'boxy', | ||
'2' => 'none' | ||
}, | ||
'T4' => { | ||
'0' => 'no', | ||
'1' => 'weak', | ||
'2' => 'strong' | ||
}, | ||
'T5' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T6' => { | ||
'0' => 'tight', | ||
'1' => 'medium', | ||
'2' => 'loose' | ||
}, | ||
'T7' => { | ||
'0' => '1', | ||
'1' => '2', | ||
'2' => '3', | ||
'3' => '4', | ||
'4' => 'more-than-4', | ||
'5' => 'cant-tell' | ||
}, | ||
'T8' => { | ||
'0' => 'none', | ||
'1' => 'small', | ||
'2' => 'moderate', | ||
'3' => 'large', | ||
'4' => 'dominant' | ||
}, | ||
'T11' => { | ||
'0' => 'merger', | ||
'1' => 'major-disturbance', | ||
'2' => 'minor-disturbance', | ||
'3' => 'none' | ||
}, | ||
'T12' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T13' => { | ||
'0' => 'yes', | ||
'1' => 'no' | ||
}, | ||
'T14' => { | ||
'0' => 'star', | ||
'1' => 'artifact', | ||
'2' => 'zoom' | ||
}, | ||
'T15' => { | ||
'0' => 'saturation', | ||
'1' => 'diffraction', | ||
'2' => 'satellite', | ||
'3' => 'ray', | ||
'4' => 'scattered', | ||
'5' => 'other', | ||
'6' => 'ghost' | ||
} | ||
}.freeze | ||
|
||
DATA_RELEASE_SUFFIX = 'euclid' | ||
|
||
private | ||
def self.data_release_suffix | ||
DATA_RELEASE_SUFFIX | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# frozen_string_literal: true | ||
require 'bajor/client' | ||
|
||
class RetrainZoobotJob | ||
class Failure < StandardError; end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.