From 38bf9a8b58b1ec0a97b1a3a3d60eb2b965ff9d87 Mon Sep 17 00:00:00 2001 From: fredericpoitevin Date: Wed, 25 Sep 2024 11:41:47 -0700 Subject: [PATCH 1/3] First stab at pyPADF workflows. --- config/templates/pypadf_difftocorr.txt | 59 +++++++++++ lute/io/models/smd.py | 135 +++++++++++++++++++++++++ lute/managed_tasks.py | 4 +- workflows/airflow/smd_fxs.py | 38 +++++++ 4 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 config/templates/pypadf_difftocorr.txt create mode 100644 workflows/airflow/smd_fxs.py diff --git a/config/templates/pypadf_difftocorr.txt b/config/templates/pypadf_difftocorr.txt new file mode 100644 index 00000000..65cf5430 --- /dev/null +++ b/config/templates/pypadf_difftocorr.txt @@ -0,0 +1,59 @@ +[PY3CORRELATION] + +# the path where output files will be saved +outpath = {{ output_path }} + +# path where diffraction files are located +samplepath = {{ input_path }} + +# a binary mask of good detector pixels, if required +#maskname = ./output/mask/hex_mask.h5 + +# the tag is prepended to all output files +# change it if you don't want to overwrite previous results +tag = {{ tag }} + +# sample to detector distance (metres) +dz = {{ sample_to_detector_distance }} + +# photon wavelength (metres) +wl = {{ photon_wavelength_m }} + +# width of a detector pixel (metres) +pw = {{ detector_pixel_width_m }} + +# no. of theta samples to use for the correlation function +nth = {{ number_theta_samples }} + +# number of CPU threads to use +nthreads = {{ nthreads }} + +# number of the starting diffraction pattern +nstart = {{ i_pattern_start }} + +# number of diffraction patterns to correlate +npatterns = {{ n_patterns }} + +# rebin each diffraction pattern by this factor +rebin = {{ bin_factor }} + +# Set this flag to use the mask +maskflag = {{ mask_flag }} + +# set this flag to output processed (=shifted, cropped, rebinned) diffraction patterns +outputdp = {{ process_flag }} + +# shift the centre of the diffraction pattern +dp_shift_flag = {{ shift_center_flag }} + +# set this flag to crop the diffraction patterns +cropflag = {{ crop_flag }} + +# x and y width of crop area (pixels) +nxcrop = {{ x_width_pixels }} +nycrop = {{ y_width_pixles }} + +# number of pixels to shift in x and y +# can use sub-pixel shifts (=decimel values) +shiftx = {{ x_shift_pixels }} +shifty = {{ y_shift_pixels }} \ No newline at end of file diff --git a/lute/io/models/smd.py b/lute/io/models/smd.py index bea3f44f..1f8cae81 100644 --- a/lute/io/models/smd.py +++ b/lute/io/models/smd.py @@ -349,3 +349,138 @@ class Thresholds(BaseModel): 0, description="If non-zero load ROIs in batches. Slower but may help OOM errors.", ) + + +class CorrelatePyPADFFXSParameters(ThirdPartyParameters): + """Parameters for running pypadf difftocorr.""" + + class Config(ThirdPartyParameters.Config): + set_result: bool = False + """Whether the Executor should mark a specified parameter as a result.""" + + class ConfParameters(BaseModel): + """Template parameters for pyPADF config file.""" + + class Config(BaseModel.Config): + extra: str = "allow" + + input_path: str = Field( + "", + description="Directory where input files are located.", + ) + + output_path: str = Field( + "", + description="Directory output files will be placed.", + ) + + tag: str = Field( + "hex", + description="tag prepended to all output files." + ) + + sample_to_detector_distance: float = Field( + 0.5, + description="Sample to detector distance (in meter)", + ) + + photon_wavelength_m: float = Field( + 0.2e-10, + description="photon wavelength (in meter)", + ) + + detector_pixel_width_m + + number_theta_samples: int = Field( + 90, + description="no. of theta samples to use for the correlation function", + ) + + nthreads: int = Field( + 10, + description="number of CPU threads to use.", + ) + + i_pattern_start: int = Field( + 1, + description="number of the starting diffraction pattern", + ) + + n_patterns: int = Field( + 6, + description="number of diffraction patterns to process", + ) + + bin_factor: int = Field( + 8, + description="Factor to bin diffraction patterns with", + ) + + mask_flag: bool = Field( + False, + description="Flag to control mask usage", + ) + + process_flag: bool = Field( + False, + desription="Flag to control output processed diffraction patterns", + ) + + shift_center_flag: bool = Field( + False, + description="Flag to control center shifting of diffraction patterns", + ) + + crop_flag: bool = Field( + Flase, + description="Flag to control cropping of diffraction patterns", + ) + + x_width_pixels: int = Field( + 100, + description="Width (X) of cropping area (in pixels).", + ) + + y_width_pixels: int = Field( + 100, + description="Width (Y) of cropping area (in pixels).", + ) + + x_shift_pixels: float = Field( + 0, + description="X-shift in fractional pixels.", + ) + + y_shift_pixels: float = Field( + 0, + description="Y-shift in fractional pixels.", + ) + + + executable: str = Field( + "python", + description="python executable.", + flag_type="", + ) + pypadf_executable: str = Field( + "/sdf/home/c/caw21/cxil1018723/pypadf/difftocorr.py", + description="pyPADF diffraction > correlation program.", + flag_type="", + ) + in_file: str = Field( + "", + description="Location of the input config file.", + flag_type="", + ) + in_parameters: Optional[ConfParameters] = Field( + None, + description="Optional template parameters to fill in the config file.", + flag_type="", + ) + lute_template_cfg: TemplateConfig = Field( + TemplateConfig( + template_name="pypadf_difftocorr.txt", + output_path="", + ), + description="Template information for the pypadf_difftocorr file.", + ) \ No newline at end of file diff --git a/lute/managed_tasks.py b/lute/managed_tasks.py index e0ac6ca9..c4c28a7e 100644 --- a/lute/managed_tasks.py +++ b/lute/managed_tasks.py @@ -48,7 +48,6 @@ set_summary=False, ) - SmallDataXSSAnalyzer: MPIExecutor = MPIExecutor("AnalyzeSmallDataXSS") """Process scattering results from a Small Data HDF5 file.""" @@ -58,6 +57,9 @@ SmallDataXESAnalyzer: MPIExecutor = MPIExecutor("AnalyzeSmallDataXES") """Process XES results from a Small Data HDF5 file.""" +PyPADFFXSCorrelater: MPIExecutor = MPIExecutor("CorrelatePyPADFFXS") +"""Process FXS results from a Small Data HDF5 file.""" + # SFX ##### CCTBXIndexer: Executor = Executor("IndexCCTBXXFEL") diff --git a/workflows/airflow/smd_fxs.py b/workflows/airflow/smd_fxs.py new file mode 100644 index 00000000..1897fba0 --- /dev/null +++ b/workflows/airflow/smd_fxs.py @@ -0,0 +1,38 @@ +"""Run smalldata_tools and basic analysis. + +Runs smalldata_tools and then basic analysis for FXS. + +Note: + The task_id MUST match the managed task name when defining DAGs - it is used + by the operator to properly launch it. + + dag_id names must be unique, and they are not namespaced via folder + hierarchy. I.e. all DAGs on an Airflow instance must have unique ids. The + Airflow instance used by LUTE is currently shared by other software - DAG + IDs should always be prefixed with `lute_`. LUTE scripts should append this + internally, so a DAG "lute_test" can be triggered by asking for "test" +""" + +from datetime import datetime +import os +from airflow import DAG +from lute.operators.jidoperators import JIDSlurmOperator + +dag_id: str = f"lute_{os.path.splitext(os.path.basename(__file__))[0]}" +description: str = ( + "Produce basic analysis for FXS from SmallData hdf5 files." +) + +dag: DAG = DAG( + dag_id=dag_id, + start_date=datetime(2024, 9, 3), + schedule_interval=None, + description=description, +) + +smd_producer: JIDSlurmOperator = JIDSlurmOperator(task_id="SmallDataProducer", dag=dag) + +fxs_correlater: JIDSlurmOperator = JIDSlurmOperator(task_id="PyPADFFXSCorrelater", dag=dag) + +# Run summaries +smd_producer >> fxs_correlater \ No newline at end of file From 5906f3913fed4ddaf4bdef5b27324246e0d81350 Mon Sep 17 00:00:00 2001 From: fredericpoitevin Date: Mon, 30 Sep 2024 16:55:42 -0700 Subject: [PATCH 2/3] PyPADF workflow ready to test. --- config/test.yaml | 25 +++++++++++++++++++++++++ lute/io/models/smd.py | 31 +++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/config/test.yaml b/config/test.yaml index 8ec059a7..85d85186 100644 --- a/config/test.yaml +++ b/config/test.yaml @@ -132,6 +132,31 @@ AnalyzeSmallDataXAS: min_Iscat: 10 # Minimum integrated scattering intensity min_ipm: 500 # Minimum x-ray intensity at selected ipm +CorrelatePyPADFFXS: + #executable: "python" + #pypadf_executable: "/sdf/home/c/caw21/cxil1018723/pypadf/difftocorr.py" + pypadf_parameters: + input_path: "/sdf/data/lcls/ds/cxi/cxil1018723/hdf5/smalldata/" + output_path: "/sdf/data/lcls/ds/cxi/cxil1018723/scratch/lute/" + tag: "test" + #sample_to_detector_distance: 0.5 # in meter + #photon_wavelength_m: 0.2e-10 # in meter + #detector_pixel_width_m: 5.0e-5 # in meter + #number_theta_samples: 90 + #nthreads: 10 + #i_pattern_start: 1 + #n_patterns: 6 + #bin_factor: 8 + #mask_flag: False + #process_flag: False + #shift_center_flag: False + #crop_flag: False + #x_width_pixels: 100 (in pixels) + #y_width_pixels: 100 (in pixels) + #x_shift_pixels: 0.0 (in pixels) + #y_shift_pixels: 0.0 (in pixels) + + Test: float_var: 0.01 str_var: "test" diff --git a/lute/io/models/smd.py b/lute/io/models/smd.py index 1f8cae81..c6b8c2eb 100644 --- a/lute/io/models/smd.py +++ b/lute/io/models/smd.py @@ -358,7 +358,7 @@ class Config(ThirdPartyParameters.Config): set_result: bool = False """Whether the Executor should mark a specified parameter as a result.""" - class ConfParameters(BaseModel): + class PyPADFParameters(BaseModel): """Template parameters for pyPADF config file.""" class Config(BaseModel.Config): @@ -389,7 +389,10 @@ class Config(BaseModel.Config): description="photon wavelength (in meter)", ) - detector_pixel_width_m + detector_pixel_width_m: float = Field( + 5.0e-5, + description="width of a detector pixel (in meter)", + ) number_theta_samples: int = Field( 90, @@ -432,7 +435,7 @@ class Config(BaseModel.Config): ) crop_flag: bool = Field( - Flase, + False, description="Flag to control cropping of diffraction patterns", ) @@ -456,6 +459,7 @@ class Config(BaseModel.Config): description="Y-shift in fractional pixels.", ) + _set_pypadf_template_parameters = template_parameter_validator("pypadf_parameters") executable: str = Field( "python", @@ -467,12 +471,12 @@ class Config(BaseModel.Config): description="pyPADF diffraction > correlation program.", flag_type="", ) - in_file: str = Field( + pypadf_file: str = Field( "", description="Location of the input config file.", flag_type="", ) - in_parameters: Optional[ConfParameters] = Field( + pypadf_parameters: Optional[PyPADFParameters] = Field( None, description="Optional template parameters to fill in the config file.", flag_type="", @@ -483,4 +487,19 @@ class Config(BaseModel.Config): output_path="", ), description="Template information for the pypadf_difftocorr file.", - ) \ No newline at end of file + ) + + @validator("pypadf_file", always=True) + def set_default_pypadf_file(cls, in_file: str, values: Dict[str, Any]) -> str: + if pypadf_file == "": + return f"{values['lute_config'].work_dir}/pypadf_diftocorr.txt" + return in_file + + @validator("lute_template_cfg", always=True) + def set_pypadf_template_path( + cls, lute_template_cfg: TemplateConfig, values: Dict[str, Any] + ) -> TemplateConfig: + if lute_template_cfg.output_path == "": + lute_template_cfg.output_path = values["pypadf_file"] + return lute_template_cfg + From 86c7f0d43cc9cc9eb03795243a0d33e0f54cfb36 Mon Sep 17 00:00:00 2001 From: fredericpoitevin Date: Mon, 30 Sep 2024 17:21:19 -0700 Subject: [PATCH 3/3] removing tasklet on Gabriel's suggestion. --- lute/managed_tasks.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lute/managed_tasks.py b/lute/managed_tasks.py index c4c28a7e..dcd9cdc7 100644 --- a/lute/managed_tasks.py +++ b/lute/managed_tasks.py @@ -40,13 +40,6 @@ ################### SmallDataProducer: Executor = Executor("SubmitSMD") """Runs the production of a smalldata HDF5 file.""" -SmallDataProducer.add_tasklet( - clone_smalldata, - ["{{ producer }}"], - when="before", - set_result=False, - set_summary=False, -) SmallDataXSSAnalyzer: MPIExecutor = MPIExecutor("AnalyzeSmallDataXSS") """Process scattering results from a Small Data HDF5 file."""