Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xfel merge filter by resolution n obs #1030

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 105 additions & 47 deletions xfel/merging/application/filter/experiment_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,11 @@ class experiment_filter(worker):
def __init__(self, params, mpi_helper=None, mpi_logger=None):
super(experiment_filter, self).__init__(params=params, mpi_helper=mpi_helper, mpi_logger=mpi_logger)

def validate(self):
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported
return
assert self.params.filter.unit_cell.value.target_space_group is not None, \
'Space group is required for unit cell filtering'

def __repr__(self):
return 'Filter experiments'

def check_unit_cell(self, experiment):

experiment_unit_cell = experiment.crystal.get_unit_cell()

is_ok = experiment_unit_cell.is_similar_to(self.params.filter.unit_cell.value.target_unit_cell,
self.params.filter.unit_cell.value.relative_length_tolerance,
self.params.filter.unit_cell.value.absolute_angle_tolerance)
Expand Down Expand Up @@ -49,7 +41,6 @@ def remove_experiments(experiments, reflections, experiment_ids_to_remove):
experiments.select_on_experiment_identifiers([i for i in experiments.identifiers() if i not in experiment_ids_to_remove])
reflections.remove_on_experiment_identifiers(experiment_ids_to_remove)
reflections.reset_ids()

return experiments, reflections

def check_cluster(self, experiment):
Expand All @@ -69,18 +60,89 @@ def check_cluster(self, experiment):
return m_distance < self.params.filter.unit_cell.cluster.covariance.mahalanobis

def run(self, experiments, reflections):
if 'unit_cell' not in self.params.filter.algorithm: # so far only "unit_cell" algorithm is supported
filter_by_unit_cell = 'unit_cell' in self.params.filter.algorithm[0]
filter_by_n_obs = 'n_obs' in self.params.filter.algorithm[0]
filter_by_resolution = 'resolution' in self.params.filter.algorithm[0]
# only "unit_cell" "n_obs" and "resolution" algorithms are supported
if (not filter_by_unit_cell) and (not filter_by_n_obs) and (not filter_by_resolution):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These validation steps should likely be in the validate function. validate runs before the program starts execution, which will save time if the parameters are invalid.

return experiments, reflections
if filter_by_unit_cell:
assert self.params.filter.unit_cell.value.target_space_group is not None, \
'Space group is required for unit cell filtering'
if filter_by_n_obs:
check0 = self.params.filter.n_obs.min is not None
check1 = self.params.filter.n_obs.max is not None
assert check0 or check1, \
'Either min or max is required for n_obs filtering'
if filter_by_resolution:
assert self.params.filter.resolution.d_min is not None, \
'd_min is required for resolution filtering'

self.logger.log_step_time("FILTER_EXPERIMENTS")

# BEGIN BY-VALUE FILTER
experiment_ids_to_remove = []
if filter_by_unit_cell:
experiment_ids_to_remove_unit_cell, removed_for_unit_cell, removed_for_space_group = self.run_filter_by_unit_cell(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_unit_cell
else:
removed_for_unit_cell = 0
removed_for_space_group = 0
if filter_by_n_obs:
experiment_ids_to_remove_n_obs, removed_for_n_obs = self.run_filter_by_n_obs(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_n_obs
else:
removed_for_n_obs = 0
if filter_by_resolution:
experiment_ids_to_remove_resolution, removed_for_resolution = self.run_filter_by_resolution(experiments, reflections)
experiment_ids_to_remove += experiment_ids_to_remove_resolution
else:
removed_for_resolution = 0
experiment_ids_to_remove = list(set(experiment_ids_to_remove))

input_len_expts = len(experiments)
input_len_refls = len(reflections)
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove)

removed_reflections = input_len_refls - len(new_reflections)
#assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments)

self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell)
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group)
self.logger.log("Experiments rejected because of n_obs %d"%removed_for_n_obs)
self.logger.log("Experiments rejected because of resolution %d"%removed_for_resolution)
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections)

# MPI-reduce total counts
comm = self.mpi_helper.comm
MPI = self.mpi_helper.MPI
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0)
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0)
total_removed_for_n_obs = comm.reduce(removed_for_n_obs, MPI.SUM, 0)
total_removed_for_resolution = comm.reduce(removed_for_resolution, MPI.SUM, 0)
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0)

# rank 0: log total counts
if self.mpi_helper.rank == 0:
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell)
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group)
self.logger.main_log("Total experiments rejected because of n_obs %d"%total_removed_for_n_obs)
self.logger.main_log("Total experiments rejected because of resolution %d"%total_removed_for_resolution)
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed)

self.logger.log_step_time("FILTER_EXPERIMENTS", True)

# Do we have any data left?
from xfel.merging.application.utils.data_counter import data_counter
data_counter(self.params).count(new_experiments, new_reflections)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data_counter result not checked?

return new_experiments, new_reflections

def run_filter_by_unit_cell(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_unit_cell = 0
removed_for_space_group = 0

# BEGIN BY-VALUE FILTER
if self.params.filter.unit_cell.algorithm == "value":
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
# If the filter unit cell and/or space group params are Auto, use the corresponding scaling targets.
if self.params.filter.unit_cell.value.target_unit_cell in (Auto, None):
if self.params.scaling.unit_cell is None:
try:
Expand All @@ -102,12 +164,10 @@ def run(self, experiments, reflections):
elif not self.check_unit_cell(experiment):
experiment_ids_to_remove.append(experiment.identifier)
removed_for_unit_cell += 1
# END BY-VALUE FILTER
# END BY-VALUE FILTER
elif self.params.filter.unit_cell.algorithm == "cluster":

from uc_metrics.clustering.util import get_population_permutation # implicit import
import pickle

class Empty: pass
if self.mpi_helper.rank == 0:
with open(self.params.filter.unit_cell.cluster.covariance.file,'rb') as F:
Expand Down Expand Up @@ -136,38 +196,36 @@ class Empty: pass
if not self.check_cluster(experiment):
experiment_ids_to_remove.append(experiment.identifier)
removed_for_unit_cell += 1
# END OF COVARIANCE FILTER
input_len_expts = len(experiments)
input_len_refls = len(reflections)
new_experiments, new_reflections = experiment_filter.remove_experiments(experiments, reflections, experiment_ids_to_remove)

removed_reflections = input_len_refls - len(new_reflections)
assert removed_for_space_group + removed_for_unit_cell == input_len_expts - len(new_experiments)

self.logger.log("Experiments rejected because of unit cell dimensions: %d"%removed_for_unit_cell)
self.logger.log("Experiments rejected because of space group %d"%removed_for_space_group)
self.logger.log("Reflections rejected because of rejected experiments: %d"%removed_reflections)

# MPI-reduce total counts
comm = self.mpi_helper.comm
MPI = self.mpi_helper.MPI
total_removed_for_unit_cell = comm.reduce(removed_for_unit_cell, MPI.SUM, 0)
total_removed_for_space_group = comm.reduce(removed_for_space_group, MPI.SUM, 0)
total_reflections_removed = comm.reduce(removed_reflections, MPI.SUM, 0)

# rank 0: log total counts
if self.mpi_helper.rank == 0:
self.logger.main_log("Total experiments rejected because of unit cell dimensions: %d"%total_removed_for_unit_cell)
self.logger.main_log("Total experiments rejected because of space group %d"%total_removed_for_space_group)
self.logger.main_log("Total reflections rejected because of rejected experiments %d"%total_reflections_removed)

self.logger.log_step_time("FILTER_EXPERIMENTS", True)

# Do we have any data left?
from xfel.merging.application.utils.data_counter import data_counter
data_counter(self.params).count(new_experiments, new_reflections)
# END OF COVARIANCE FILTER
return experiment_ids_to_remove, removed_for_unit_cell, removed_for_space_group

return new_experiments, new_reflections
def run_filter_by_n_obs(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_n_obs = 0
for expt_index, experiment in enumerate(experiments):
refls_expt = reflections.select(reflections['id'] == expt_index)
if self.params.filter.n_obs.max and len(refls_expt) > self.params.filter.n_obs.max:
experiment_ids_to_remove.append(experiment.identifier)
removed_for_n_obs += 1
if self.params.filter.n_obs.min and len(refls_expt) < self.params.filter.n_obs.min:
experiment_ids_to_remove.append(experiment.identifier)
removed_for_n_obs += 1
return experiment_ids_to_remove, removed_for_n_obs

def run_filter_by_resolution(self, experiments, reflections):
experiment_ids_to_remove = []
removed_for_resolution = 0
resolution = reflections.compute_d(experiments)
start = 0
for expt_index, expt in enumerate(experiments):
refls_expt = reflections.select(reflections['id'] == expt_index)
n_refls = refls_expt.size()
max_resolution = min(resolution[start: start + n_refls])
start += n_refls
if max_resolution > self.params.filter.resolution.d_min:
experiment_ids_to_remove.append(expt.identifier)
removed_for_resolution += 1
return experiment_ids_to_remove, removed_for_resolution

if __name__ == '__main__':
from xfel.merging.application.worker import exercise_worker
Expand Down
8 changes: 6 additions & 2 deletions xfel/merging/application/phil/phil.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,18 @@
.help = The filter section defines criteria to accept or reject whole experiments
.help = or to modify the entire experiment by a reindexing operator
.help = refer to the select section for filtering of individual reflections
.help = only unit_cell, resolution, and n_obs are implemented
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go ahead and drop reindex and report if they aren't implemented.

{
algorithm = n_obs reindex resolution unit_cell report
.type = choice
.type = strings
.multiple = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If .type = choice and .multiple = true, then params.filter.algorithm should be a list of already validated strings. Is there a reason that doesn't work?

n_obs {
min = 15
min = None
.type = int
.help = Minimum number of observations for subsequent processing
max = None
.type = int
.help = Maximum number of observations for subsequent processing
}
reindex {
data_reindex_op = h,k,l
Expand Down
Loading