Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diffBragg hardening #857

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions simtbx/command_line/hopper_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import sys
import logging
import pandas
import time

from simtbx.diffBragg.hopper_ensemble_utils import load_inputs
from libtbx.mpi4py import MPI
Expand Down Expand Up @@ -75,7 +76,13 @@ def write_commandline(params):
else:
mpi_logger.setup_logging_from_params(params)

df = pandas.read_pickle(args.input)
for i in range(3):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what was the failure occurring here without the waiting ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there two jobs running simultaneously??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct me if I'm wrong -- this is what prep_time is currently addressing? This was a first stab at making it happen by default to wait until files are available, if necessary. Am I in the wrong place?

try:
df = pandas.read_pickle(args.input)
break
except FileNotFoundError:
time.sleep(60)
# wait for preimported files to be written

if params.skip is not None:
df = df.iloc[params.skip:]
Expand All @@ -92,7 +99,9 @@ def write_commandline(params):
os.makedirs(gather_dir)

modelers = load_inputs(df, params, exper_key=args.exp, refls_key=args.refl, gather_dir=gather_dir)
# note, we only go beyond this point if perImport flag was not passed
# do not proceed beyond this point if we are only preimporting
if args.preImport:
sys.exit()
modelers.cell_for_mtz = args.cell
modelers.max_sigma = args.maxSigma
modelers.outdir = args.outdir if args.outdir is not None else modelers.params.outdir
Expand Down
29 changes: 18 additions & 11 deletions simtbx/diffBragg/hopper_ensemble_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from dials.array_family import flex
from dxtbx.model import ExperimentList
from xfel.merging.application.utils.memory_usage import get_memory_usage
from libtbx.mpi4py import mpi_abort_on_exception


COMM = MPI.COMM_WORLD
Expand Down Expand Up @@ -577,6 +578,7 @@ def get_gather_name(exper_name, gather_dir):
return os.path.abspath(gathered_name)


@mpi_abort_on_exception
def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictions',
gather_dir=None):

Expand Down Expand Up @@ -621,7 +623,11 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio
exper_dataframe = pandas_table.query("%s=='%s'" % (exper_key, exper_name))

refl_name = exper_dataframe[refls_key].values[0]
refls = flex.reflection_table.from_file(refl_name)
try:
refls = flex.reflection_table.from_file(refl_name)
except IOError:
MAIN_LOGGER.critical("Unable to load reflection file %s -- skipping." % refl_name)
continue

miller_inds = list( refls['miller_index'])
is_not_000 = [h != (0, 0, 0) for h in miller_inds]
Expand All @@ -641,16 +647,17 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio
shot_modeler.exper_name = exper_name
shot_modeler.refl_name = refl_name
shot_modeler.rank = COMM.rank
if params.refiner.load_data_from_refl:
gathered = shot_modeler.GatherFromReflectionTable(expt, refls, sg_symbol=params.space_group)
MAIN_LOGGER.debug("tried loading from reflection table")
else:
gathered = shot_modeler.GatherFromExperiment(expt, refls, sg_symbol=params.space_group)
MAIN_LOGGER.debug("tried loading data from expt table")
if not gathered:
raise IOError("Failed to gather data from experiment %s", exper_name)
else:
try:
if params.refiner.load_data_from_refl:
gathered = shot_modeler.GatherFromReflectionTable(expt, refls, sg_symbol=params.space_group)
MAIN_LOGGER.debug("tried loading from reflection table")
else:
gathered = shot_modeler.GatherFromExperiment(expt, refls, sg_symbol=params.space_group)
MAIN_LOGGER.debug("tried loading data from expt table")
MAIN_LOGGER.debug("successfully loaded data")
except RuntimeError:
MAIN_LOGGER.critical("Failed to gather data from experiment %s; skipping.", exper_name)
continue
dermen marked this conversation as resolved.
Show resolved Hide resolved
MAIN_LOGGER.info("EVENT: DONE LOADING ROI")

if gather_dir is not None:
Expand Down Expand Up @@ -696,7 +703,7 @@ def load_inputs(pandas_table, params, exper_key="exp_name", refls_key='predictio
pandas_table.to_pickle(pd_name)
print("Wrote file %s to be used to re-run ens.hopper . Use optional ens.hopper arg '--refl ens.hopper.imported', and the phil param load_data_from_refl=True to load the imported data" % pd_name)
COMM.barrier()
sys.exit()
return # this must not be a sys.exit() which will be interpreted by the @mpi_abort_on_exception decorator as an exception
shot_modelers.mpi_set_x_slices()

assert shot_modelers.num_modelers > 0
Expand Down
13 changes: 11 additions & 2 deletions simtbx/diffBragg/prep_stage2_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,29 @@ def prep_dataframe(df, refls_key="predictions"):
nshots = len(df)
refls_names = df[refls_key]
refls_per_shot = []
shots_skipped = []
if COMM.rank==0:
LOGGER.info("Loading nrefls per shot")
for i_shot, name in enumerate(refls_names):
if i_shot % COMM.size != COMM.rank:
continue
R = flex.reflection_table.from_file(name)
try:
R = flex.reflection_table.from_file(name)
except IOError as e:
LOGGER.critical("Unable to load reflections from file %s -- skipping file." % name)
LOGGER.info("Attempting to load file produced error:\n%s" % e)
shots_skipped.append(i_shot)
refls_per_shot.append((i_shot, 0))
continue
if len(R)==0:
LOGGER.critical("Reflection %s has 0 reflections !" % (name, len(R)))
refls_per_shot.append((i_shot, len(R)))

refls_per_shot = COMM.reduce(refls_per_shot, root=0)
shots_skipped = COMM.reduce(shots_skipped, root=0)
work_distribution = None
if COMM.rank==0:
print("Found %d shots" % nshots)
print("Able to read reflections tables for %d of %d shots" % ((nshots - len(shots_skipped)), nshots))
refls_per_shot = sorted(refls_per_shot)
indices, weights = zip(*refls_per_shot)
assert list(indices) == list(range(nshots)) # new sanity test
Expand Down
2 changes: 2 additions & 0 deletions simtbx/diffBragg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,8 @@ def get_roi_background_and_selection_flags(refls, imgs, shoebox_sz=10, reject_ed
MAIN_LOGGER.debug("Number of skipped ROI with negative BGs: %d / %d" % (num_roi_negative_bg, len(rois)))
MAIN_LOGGER.debug("Number of skipped ROI with NAN in BGs: %d / %d" % (num_roi_nan_bg, len(rois)))
MAIN_LOGGER.info("Number of ROIS that will proceed to refinement: %d/%d" % (np.sum(selection_flags), len(rois)))
if np.sum(selection_flags) == 0:
raise RuntimeError("Can't proceed with zero ROIs")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to catch this error in hopper_utils.py / GatherFromExperiment:

try:
     roi_packet = utils.get_roi_background_and_selection_flag( ... 

except RuntimeError:  
    return False

something like that... the return value of this method is inspected in command_line/hopper.py ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that makes sense.

if ret_cov:
return kept_rois, panel_ids, tilt_abc, selection_flags, background, all_cov
else:
Expand Down