-
Notifications
You must be signed in to change notification settings - Fork 119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Xfel merge filter by resolution n obs #1030
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,19 +9,11 @@ class experiment_filter(worker): | |
def __init__(self, params, mpi_helper=None, mpi_logger=None): | ||
super(experiment_filter, self).__init__(params=params, mpi_helper=mpi_helper, mpi_logger=mpi_logger) | ||
|
||
def validate(self): | ||
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported | ||
return | ||
assert self.params.filter.unit_cell.value.target_space_group is not None, \ | ||
'Space group is required for unit cell filtering' | ||
|
||
def __repr__(self): | ||
return 'Filter experiments' | ||
|
||
def check_unit_cell(self, experiment): | ||
|
||
experiment_unit_cell = experiment.crystal.get_unit_cell() | ||
|
||
is_ok = experiment_unit_cell.is_similar_to(self.params.filter.unit_cell.value.target_unit_cell, | ||
self.params.filter.unit_cell.value.relative_length_tolerance, | ||
self.params.filter.unit_cell.value.absolute_angle_tolerance) | ||
|
@@ -49,7 +41,6 @@ def remove_experiments(experiments, reflections, experiment_ids_to_remove): | |
experiments.select_on_experiment_identifiers([i for i in experiments.identifiers() if i not in experiment_ids_to_remove]) | ||
reflections.remove_on_experiment_identifiers(experiment_ids_to_remove) | ||
reflections.reset_ids() | ||
|
||
return experiments, reflections | ||
|
||
def check_cluster(self, experiment): | ||
|
@@ -69,18 +60,89 @@ def check_cluster(self, experiment): | |
return m_distance < self.params.filter.unit_cell.cluster.covariance.mahalanobis | ||
|
||
def run(self, experiments, reflections): | ||
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported | ||
filter_by_unit_cell = 'unit_cell' in self.params.filter.algorithm[0] | ||
filter_by_n_obs = 'n_obs' in self.params.filter.algorithm[0] | ||
filter_by_resolution = 'resolution' in self.params.filter.algorithm[0] | ||
# only "unit_cell" "n_obs" and "resolution" algorithms are supported | ||
if (not filter_by_unit_cell) and (not filter_by_n_obs) and (not filter_by_resolution): | ||
return experiments, reflections | ||
if filter_by_unit_cell: | ||
assert self.params.filter.unit_cell.value.target_space_group is not None, \ | ||
'Space group is required for unit cell filtering' | ||
if filter_by_n_obs: | ||
check0 = self.params.filter.n_obs.min is not None | ||
check1 = self.params.filter.n_obs.max is not None | ||
assert check0 or check1, \ | ||
'Either min or max is required for n_obs filtering' | ||
if filter_by_resolution: | ||
assert self.params.filter.resolution.d_min is not None, \ | ||
'd_min is required for resolution filtering' | ||
|
||
self.logger.log_step_time("FILTER_EXPERIMENTS") | ||
|
||
# BEGIN BY-VALUE FILTER | ||
experiment_ids_to_remove = [] | ||
if filter_by_unit_cell: | ||
experiment_ids_to_remove_unit_cell, removed_for_unit_cell, removed_for_space_group = self.run_filter_by_unit_cell(experiments, reflections) | ||
experiment_ids_to_remove += experiment_ids_to_remove_unit_cell | ||
else: | ||
removed_for_unit_cell = 0 | ||
removed_for_space_group = 0 | ||
if filter_by_n_obs: | ||
experiment_ids_to_remove_n_obs, removed_for_n_obs = self.run_filter_by_n_obs(experiments, reflections) | ||
experiment_ids_to_remove += experiment_ids_to_remove_n_obs | ||
else: | ||
removed_for_n_obs = 0 | ||
if filter_by_resolution: | ||
experiment_ids_to_remove_resolution, removed_for_resolution = self.run_filter_by_resolution(experiments, reflections) | ||
experiment_ids_to_remove += experiment_ids_to_remove_resolution | ||
else: | ||
removed_for_resolution = 0 | ||
experiment_ids_to_remove = list(set(experiment_ids_to_remove)) | ||
|
||
input_len_expts = len(experiments) | ||
input_len_refls = len(reflections) | ||
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove) | ||
|
||
removed_reflections = input_len_refls - len(new_reflections) | ||
#assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments) | ||
|
||
self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell) | ||
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group) | ||
self.logger.log("Experiments rejected because of n_obs %d"%removed_for_n_obs) | ||
self.logger.log("Experiments rejected because of resolution %d"%removed_for_resolution) | ||
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections) | ||
|
||
# MPI-reduce total counts | ||
comm = self.mpi_helper.comm | ||
MPI = self.mpi_helper.MPI | ||
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0) | ||
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0) | ||
total_removed_for_n_obs = comm.reduce(removed_for_n_obs, MPI.SUM, 0) | ||
total_removed_for_resolution = comm.reduce(removed_for_resolution, MPI.SUM, 0) | ||
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0) | ||
|
||
# rank 0: log total counts | ||
if self.mpi_helper.rank == 0: | ||
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell) | ||
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group) | ||
self.logger.main_log("Total experiments rejected because of n_obs %d"%total_removed_for_n_obs) | ||
self.logger.main_log("Total experiments rejected because of resolution %d"%total_removed_for_resolution) | ||
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed) | ||
|
||
self.logger.log_step_time("FILTER_EXPERIMENTS", True) | ||
|
||
# Do we have any data left? | ||
from xfel.merging.application.utils.data_counter import data_counter | ||
data_counter(self.params).count(new_experiments, new_reflections) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. data_counter result not checked? |
||
return new_experiments, new_reflections | ||
|
||
def run_filter_by_unit_cell(self, experiments, reflections): | ||
experiment_ids_to_remove = [] | ||
removed_for_unit_cell = 0 | ||
removed_for_space_group = 0 | ||
|
||
# BEGIN BY-VALUE FILTER | ||
if self.params.filter.unit_cell.algorithm == "value": | ||
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets. | ||
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets. | ||
if self.params.filter.unit_cell.value.target_unit_cell in (Auto, None): | ||
if self.params.scaling.unit_cell is None: | ||
try: | ||
|
@@ -102,12 +164,10 @@ def run(self, experiments, reflections): | |
elif not self.check_unit_cell(experiment): | ||
experiment_ids_to_remove.append(experiment.identifier) | ||
removed_for_unit_cell += 1 | ||
# END BY-VALUE FILTER | ||
# END BY-VALUE FILTER | ||
elif self.params.filter.unit_cell.algorithm == "cluster": | ||
|
||
from uc_metrics.clustering.util import get_population_permutation # implicit import | ||
import pickle | ||
|
||
class Empty: pass | ||
if self.mpi_helper.rank == 0: | ||
with open(self.params.filter.unit_cell.cluster.covariance.file,'rb') as F: | ||
|
@@ -136,38 +196,36 @@ class Empty: pass | |
if not self.check_cluster(experiment): | ||
experiment_ids_to_remove.append(experiment.identifier) | ||
removed_for_unit_cell += 1 | ||
# END OF COVARIANCE FILTER | ||
input_len_expts = len(experiments) | ||
input_len_refls = len(reflections) | ||
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove) | ||
|
||
removed_reflections = input_len_refls - len(new_reflections) | ||
assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments) | ||
|
||
self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell) | ||
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group) | ||
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections) | ||
|
||
# MPI-reduce total counts | ||
comm = self.mpi_helper.comm | ||
MPI = self.mpi_helper.MPI | ||
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0) | ||
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0) | ||
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0) | ||
|
||
# rank 0: log total counts | ||
if self.mpi_helper.rank == 0: | ||
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell) | ||
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group) | ||
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed) | ||
|
||
self.logger.log_step_time("FILTER_EXPERIMENTS", True) | ||
|
||
# Do we have any data left? | ||
from xfel.merging.application.utils.data_counter import data_counter | ||
data_counter(self.params).count(new_experiments, new_reflections) | ||
# END OF COVARIANCE FILTER | ||
return experiment_ids_to_remove, removed_for_unit_cell, removed_for_space_group | ||
|
||
return new_experiments, new_reflections | ||
def run_filter_by_n_obs(self, experiments, reflections): | ||
experiment_ids_to_remove = [] | ||
removed_for_n_obs = 0 | ||
for expt_index, experiment in enumerate(experiments): | ||
refls_expt = reflections.select(reflections['id'] == expt_index) | ||
if self.params.filter.n_obs.max and len(refls_expt) > self.params.filter.n_obs.max: | ||
experiment_ids_to_remove.append(experiment.identifier) | ||
removed_for_n_obs += 1 | ||
if self.params.filter.n_obs.min and len(refls_expt) < self.params.filter.n_obs.min: | ||
experiment_ids_to_remove.append(experiment.identifier) | ||
removed_for_n_obs += 1 | ||
return experiment_ids_to_remove, removed_for_n_obs | ||
|
||
def run_filter_by_resolution(self, experiments, reflections): | ||
experiment_ids_to_remove = [] | ||
removed_for_resolution = 0 | ||
resolution = reflections.compute_d(experiments) | ||
start = 0 | ||
for expt_index, expt in enumerate(experiments): | ||
refls_expt = reflections.select(reflections['id'] == expt_index) | ||
n_refls = refls_expt.size() | ||
max_resolution = min(resolution[start: start + n_refls]) | ||
start += n_refls | ||
if max_resolution > self.params.filter.resolution.d_min: | ||
experiment_ids_to_remove.append(expt.identifier) | ||
removed_for_resolution += 1 | ||
return experiment_ids_to_remove, removed_for_resolution | ||
|
||
if __name__ == '__main__': | ||
from xfel.merging.application.worker import exercise_worker | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,14 +129,18 @@ | |
.help = The filter section defines criteria to accept or reject whole experiments | ||
.help = or to modify the entire experiment by a reindexing operator | ||
.help = refer to the select section for filtering of individual reflections | ||
.help = only unit_cell, resolution, and n_obs are implemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go ahead and drop reindex and report if they aren't implemented. |
||
{ | ||
algorithm = n_obs reindex resolution unit_cell report | ||
.type = choice | ||
.type = strings | ||
.multiple = True | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If .type = choice and .multiple = true, then params.filter.algorithm should be a list of already validated strings. Is there a reason that doesn't work? |
||
n_obs { | ||
min = 15 | ||
min = None | ||
.type = int | ||
.help = Minimum number of observations for subsequent processing | ||
max = None | ||
.type = int | ||
.help = Maximum number of observations for subsequent processing | ||
} | ||
reindex { | ||
data_reindex_op = h,k,l | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These validation steps should likely be in the validate function. validate runs before the program starts execution, which will save time if the parameters are invalid.