diff --git a/config/test_var_subs.yaml b/config/test_var_subs.yaml
new file mode 100644
index 00000000..d3ab64f0
--- /dev/null
+++ b/config/test_var_subs.yaml
@@ -0,0 +1,25 @@
+%YAML 1.3
+---
+title: "Configuration to Test YAML Substitution"
+experiment: "TestYAMLSubs"
+run: 12
+date: "2024/05/01"
+lute_version: 0.1
+task_timeout: 600
+work_dir: "/sdf/scratch/users/d/dorlhiac"
+...
+---
+OtherTask:
+ useful_other_var: "USE ME!"
+
+NonExistentTask:
+ test_sub: "/path/to/{{ experiment }}/file_r{{ run:04d }}.input"
+ test_env_sub: "/path/to/{{ $EXPERIMENT }}/file.input"
+ test_nested:
+ a: "outfile_{{ run }}_one.out"
+ b:
+ c: "outfile_{{ run }}_two.out"
+ d: "{{ OtherTask.useful_other_var }}"
+ test_fmt: "{{ run:04d }}"
+ test_env_fmt: "{{ $RUN:04d }}"
+...
diff --git a/docs/tutorial/new_task.md b/docs/tutorial/new_task.md
index bb580669..fa415892 100644
--- a/docs/tutorial/new_task.md
+++ b/docs/tutorial/new_task.md
@@ -22,7 +22,7 @@ A brief overview of parameters objects will be provided below. The following inf
**`Task`s and `TaskParameter`s**
All `Task`s have a corresponding `TaskParameters` object. These objects are linked **exclusively** by a named relationship. For a `Task` named `MyThirdPartyTask`, the parameters object **must** be named `MyThirdPartyTaskParameters`. For third-party `Task`s there are a number of additional requirements:
-- The model must inherit from a base class called `BaseBinaryParameters`.
+- The model must inherit from a base class called `ThirdPartyParameters`.
- The model must have one field specified called `executable`. The presence of this field indicates that the `Task` is a third-party `Task` and the specified executable must be called. This allows all third-party `Task`s to be defined exclusively by their parameters model. A single `ThirdPartyTask` class handles execution of **all** third-party `Task`s.
All models are stored in `lute/io/models`. For any given `Task`, a new model can be added to an existing module contained in this directory or to a new module. If creating a new module, make sure to add an import statement to `lute.io.models.__init__`.
@@ -39,13 +39,13 @@ from pydantic import Field, validator
# Also include any pydantic type specifications - Pydantic has many custom
# validation types already, e.g. types for constrained numberic values, URL handling, etc.
-from .base import BaseBinaryParameters
+from .base import ThirdPartyParameters
# Change class name as necessary
-class RunTaskParameters(BaseBinaryParameters):
+class RunTaskParameters(ThirdPartyParameters):
"""Parameters for RunTask..."""
- class Config(BaseBinaryParameters.Config): # MUST be exactly as written here.
+ class Config(ThirdPartyParameters.Config): # MUST be exactly as written here.
...
# Model-wide configuration will go here
@@ -83,10 +83,10 @@ As an example, we can again consider defining a model for a `RunTask` `Task`. Co
A model specification for this `Task` may look like:
```py
-class RunTaskParameters(BaseBinaryParameters):
+class RunTaskParameters(ThirdPartyParameters):
"""Parameters for the runtask binary."""
- class Config(BaseBinaryParameters.Config):
+ class Config(ThirdPartyParameters.Config):
long_flags_use_eq: bool = True # For the --method parameter
# Prefer using full/absolute paths where possible.
@@ -144,7 +144,7 @@ For example, consider the `method_param1` field defined above for `RunTask`. We
```py
from pydantic import Field, validator, ValidationError
-class RunTaskParameters(BaseBinaryParameters):
+class RunTaskParameters(ThirdPartyParameters):
"""Parameters for the runtask binary."""
# [...]
@@ -205,10 +205,10 @@ Parameters used to run a `Task` are recorded in a database for every `Task`. It
```py
from pydantic import Field, validator
-from .base import BaseBinaryParameters
+from .base import ThirdPartyParameters
from ..db import read_latest_db_entry
-class RunTask2Parameters(BaseBinaryParameters):
+class RunTask2Parameters(ThirdPartyParameters):
input: str = Field("", description="Input file.", flag_type="--")
@validator("input")
@@ -241,8 +241,8 @@ After a pydantic model has been created, the next required step is to define a *
As mentioned, for most cases you can setup a third-party `Task` to use the first type of `Executor`. If, however, your third-party `Task` uses MPI, you can use either. When using the standard `Executor` for a `Task` requiring MPI, the `executable` in the pydantic model must be set to `mpirun`. For example, a third-party `Task` model, that uses MPI but can be run with the `Executor` may look like the following. We assume this `Task` runs a Python script using MPI.
```py
-class RunMPITaskParameters(BaseBinaryParameters):
- class Config(BaseBinaryParameters.Config):
+class RunMPITaskParameters(ThirdPartyParameters):
+ class Config(ThirdPartyParameters.Config):
...
executable: str = Field("mpirun", description="MPI executable")
@@ -297,14 +297,14 @@ LUTE provides two additional base models which are used for template parsing in
- `TemplateParameters` objects which hold parameters which will be used to render a portion of a template.
- `TemplateConfig` objects which hold two strings: the name of the template file to use and the full path (including filename) of where to output the rendered result.
-`Task` models which inherit from the `BaseBinaryParameters` model, as all third-party `Task`s should, allow for extra arguments. LUTE will parse any extra arguments provided in the configuration YAML as `TemplateParameters` objects automatically, which means that they do not need to be explicitly added to the pydantic model (although they can be). As such the **only** requirement on the Python-side when adding template rendering functionality to the `Task` is the addition of one parameter - an instance of `TemplateConfig`. The instance **MUST** be called `lute_template_cfg`.
+`Task` models which inherit from the `ThirdPartyParameters` model, as all third-party `Task`s should, allow for extra arguments. LUTE will parse any extra arguments provided in the configuration YAML as `TemplateParameters` objects automatically, which means that they do not need to be explicitly added to the pydantic model (although they can be). As such the **only** requirement on the Python-side when adding template rendering functionality to the `Task` is the addition of one parameter - an instance of `TemplateConfig`. The instance **MUST** be called `lute_template_cfg`.
```py
from pydantic import Field, validator
from .base import TemplateConfig
-class RunTaskParamaters(BaseBinaryParameters):
+class RunTaskParamaters(ThirdPartyParameters):
...
# This parameter MUST be called lute_template_cfg!
lute_template_cfg: TemplateConfig = Field(
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..d84b19ea
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,397 @@
+# Setup
+LUTE is publically available on [GitHub](https://github.com/slac-lcls/lute). In order to run it, the first step is to clone the repository:
+
+```bash
+# Navigate to the directory of your choice.
+git clone@github.com:slac-lcls/lute
+```
+The repostiory directory structure is as follows:
+
+```
+lute
+ |--- config # Configuration YAML files (see below) and templates for third party config
+ |--- docs # Documentation (including this page)
+ |--- launch_scripts # Entry points for using SLURM and communicating with Airflow
+ |--- lute # Code
+ |--- run_task.py # Script to run an individual managed Task
+ |--- ...
+ |--- utilities # Help utility programs
+ |--- workflows # This directory contains workflow definitions. It is synced elsewhere and not used directly.
+
+```
+
+In general, most interactions with the software will be through scripts located in the `launch_scripts` directory. Some users (for certain use-cases) may also choose to run the `run_task.py` script directly - it's location has been highlighted within hierarchy. To begin with you will need a YAML file, templates for which are available in the `config` directory. The structure of the YAML file and how to use the various launch scripts are described in more detail below.
+
+### A note on utilties
+In the `utilities` directory there are two useful programs to provide assistance with using the software:
+- `utilities/dbview`: LUTE stores all parameters for every analysis routine it runs (as well as results) in a database. This database is stored in the `work_dir` defined in the YAML file (see below). The `dbview` utility is a TUI application (Text-based user interface) which runs in the terminal. It allows you to navigate a LUTE database using the arrow keys, etc. Usage is: `utilities/dbview -p `.
+- `utilities/lute_help`: This utility provides help and usage information for running LUTE software. E.g., it provides access to parameter descriptions to assist in properly filling out a configuration YAML. It's usage is described in slightly more detail below.
+
+# Basic Usage
+## Overview
+LUTE runs code as `Task`s that are managed by an `Executor`. The `Executor` provides modifications to the environment the `Task` runs in, as well as controls details of inter-process communication, reporting results to the eLog, etc. Combinations of specific `Executor`s and `Task`s are already provided, and are referred to as **managed** `Task`s. **Managed** `Task`s are submitted as a single unit. They can be run individually, or a series of independent steps can be submitted all at once in the form of a workflow, or **directed acyclic graph** (**DAG**). This latter option makes use of Airflow to manage the individual execution steps.
+
+Running analysis with LUTE is the process of submitting one or more **managed** `Task`s. This is generally a two step process.
+1. First, a configuration YAML file is prepared. This contains the parameterizations of all the `Task`s which you may run.
+2. Individual **managed** `Task` submission, or workflow (**DAG**) submission.
+
+These two steps are described below.
+
+## Preparing a Configuration YAML
+All `Task`s are parameterized through a single configuration YAML file - even third party code which requires its own configuration files is managed through this YAML file. The basic structure is split into two documents, a brief header section which contains information that is applicable across all `Task`s, such as the experiment name, run numbers and the working directory, followed by per `Task` parameters:
+```yaml
+%YAML 1.3
+---
+title: "Some title."
+experiment: "MYEXP123"
+# run: 12 # Does not need to be provided
+date: "2024/05/01"
+lute_version: 0.1
+task_timeout: 600
+work_dir: "/sdf/scratch/users/d/dorlhiac"
+...
+---
+TaskOne:
+ param_a: 123
+ param_b: 456
+ param_c:
+ sub_var: 3
+ sub_var2: 4
+
+TaskTwo:
+ new_param1: 3
+ new_param2: 4
+
+# ...
+...
+```
+
+In the first document, the header, it is important that the `work_dir` is properly specified. This is the root directory from which `Task` outputs will be written, and the LUTE database will be stored. It may also be desirable to modify the `task_timeout` parameter which defines the time limit for individual `Task` jobs. By default it is set to 10 minutes, although this may not be sufficient for long running jobs. This value will be applied to **all** `Task`s so should account for the longest running job you expect.
+
+The actual analysis parameters are defined in the second document. As these vary from `Task` to `Task`, a full description will not be provided here. An actual template with real `Task` parameters is available in `config/test.yaml`. Your analysis POC can also help you set up and choose the correct `Task`s to include as a starting point. The template YAML file has further descriptions of what each parameter does and how to fill it out. You can also refer to the `lute_help` program described under the following sub-heading.
+
+Some things to consider and possible points of confusion:
+- While we will be submitting **managed** `Task`s, the parameters are defined at the `Task` level. I.e. the **managed** `Task` and `Task` itself have different names, and the names in the YAML refer to the latter. This is because a single `Task` can be run using different `Executor` configurations, but using the same parameters. The list of **managed** `Task`s is in `lute/managed_tasks.py`. A table is also provided below for some routines of interest..
+
+
+| **Managed** `Task` | The `Task` it Runs | `Task` Description |
+|:------------------------:|:------------------------:|:--------------------------------------------------------------:|
+| `SmallDataProducer` | `SubmitSMD` | Smalldata production |
+| `CrystFELIndexer` | `IndexCrystFEL` | Crystallographic indexing |
+| `PartialatorMerger` | `MergePartialator` | Crystallographic merging |
+| `HKLComparer` | `CompareHKL` | Crystallographic figures of merit |
+| `HKLManipulator` | `ManipulateHKL` | Crystallographic format conversions |
+| `DimpleSolver` | `DimpleSolve` | Crystallographic structure solution with molecular replacement |
+| `PeakFinderPyAlgos` | `FindPeaksPyAlgos` | Peak finding with PyAlgos algorithm. |
+| `PeakFinderPsocake` | `FindPeaksPsocake` | Peak finding with psocake algorithm. |
+| `StreamFileConcatenator` | `ConcatenateStreamFiles` | Stream file concatenation. |
+
+
+### How do I know what parameters are available, and what they do?
+A summary of `Task` parameters is available through the `lute_help` program.
+
+```bash
+> utilities/lute_help -t [TaskName]
+```
+
+Note, some parameters may say "Unknown description" - this either means they are using an old-style defintion that does not include parameter help, or they may have some internal use. In particular you will see this for `lute_config` on every `Task`, this parameter is filled in automatically and should be ignored. E.g. as an example:
+
+```bash
+> utilities/lute_help -t IndexCrystFEL
+INFO:__main__:Fetching parameter information for IndexCrystFEL.
+IndexCrystFEL
+-------------
+Parameters for CrystFEL's `indexamajig`.
+
+There are many parameters, and many combinations. For more information on
+usage, please refer to the CrystFEL documentation, here:
+https://www.desy.de/~twhite/crystfel/manual-indexamajig.html
+
+
+Required Parameters:
+--------------------
+[...]
+
+All Parameters:
+-------------
+[...]
+
+highres (number)
+ Mark all pixels greater than `x` has bad.
+
+profile (boolean) - Default: False
+ Display timing data to monitor performance.
+
+temp_dir (string)
+ Specify a path for the temp files folder.
+
+wait_for_file (integer) - Default: 0
+ Wait at most `x` seconds for a file to be created. A value of -1 means wait forever.
+
+no_image_data (boolean) - Default: False
+ Load only the metadata, no iamges. Can check indexability without high data requirements.
+
+[...]
+```
+
+## Running Managed `Task`s and Workflows (DAGs)
+After a YAML file has been filled in you can run a `Task`. There are multiple ways to submit a `Task`, but there are 3 that are most likely:
+1. Run a single **managed** `Task` interactively by running `python ...`
+2. Run a single **managed** `Task` as a batch job (e.g. on S3DF) via a SLURM submission `submit_slurm.sh ...`
+3. Run a DAG (workflow with multiple **managed** `Task`s).
+
+These will be covered in turn below; however, in general all methods will require two parameters: the path to a configuration YAML file, and the name of the **managed** `Task` or workflow you want to run. When submitting via SLURM or submitting an entire workflow there are additional parameters to control these processes.
+
+
+### Running single managed `Task`s interactively
+The simplest submission method is just to run Python interactively. In most cases this is not practical for long-running analysis, but may be of use for short `Task`s or when debugging. From the root directory of the LUTE repository (or after installation) you can use the `run_task.py` script:
+
+```bash
+> python -B [-O] run_task.py -t -c
+```
+
+The command-line arguments in square brackets `[]` are optional, while those in `<>` must be provided:
+- `-O` is the flag controlling whether you run in debug or non-debug mode. **By default, i.e. if you do NOT provide this flag you will run in debug mode** which enables verbose printing. Passing `-O` will turn off debug to minimize output.
+- `-t ` is the name of the **managed** `Task` you want to run.
+- `-c ` is the path to the configuration YAML.
+
+### Submitting a single managed `Task` as a batch job
+On S3DF you can also submit individual **managed** `Task`s to run as batch jobs. To do so use `launch_scripts/submit_slurm.sh`
+
+```bash
+> launch_scripts/submit_slurm.sh -t -c [--debug] $SLURM_ARGS
+```
+
+As before command-line arguments in square brackets `[]` are optional, while those in `<>` must be provided
+- `-t ` is the name of the **managed** `Task` you want to run.
+- `-c ` is the path to the configuration YAML.
+- `--debug` is the flag to control whether or not to run in debug mode.
+
+In addition to the LUTE-specific arguments, SLURM arguments must also be provided (`$SLURM_ARGS` above). You can provide as many as you want; however you will need to at least provide:
+- `--partition=` - The queue to run on, in general for LCLS this is `milano`
+- `--account=lcls:` - The account to use for batch job accounting.
+
+You will likely also want to provide at a minimum:
+- `--ntasks=<...>` to control the number of cores in allocated.
+
+In general, it is best to prefer the long-form of the SLURM-argument (`--arg=<...>`) in order to avoid potential clashes with present or future LUTE arguments.
+
+### Workflow (DAG) submission
+**NOTE**: Support for submitting Airflow DAGs from the command-line is coming soon. As of 2024/05/03 you will need to use the instructions for **DAG Submission from the** `eLog` described below. This is due to authentication requirements - support for new API calls is in the works.
+
+Finally, you can submit a full workflow (e.g. SFX analysis, smalldata production and summary results, geometry optimization...). This can be done using a single script, `submit_launch_airflow.sh`, similarly to the SLURM submission above:
+
+```bash
+> launch_scripts/submit_launch_airflow.sh /path/to/lute/launch_scripts/launch_airflow.py -c -w [--debug] [--test] $SLURM_ARGS
+```
+The submission process is slightly more complicated in this case. A more in-depth explanation is provided under "Airflow Launch Steps", in the advanced usage section below if interested. The parameters are as follows - as before command-line arguments in square brackets `[]` are optional, while those in `<>` must be provided:
+- The **first argument** (must be first) is the full path to the `launch_scripts/launch_airflow.py` script located in whatever LUTE installation you are running. All other arguments can come afterwards in any order.
+- `-c ` is the path to the configuration YAML to use.
+- `-w ` is the name of the DAG (workflow) to run. This replaces the task name provided when using the other two methods above. A DAG list is provided below.
+- `--debug` controls whether to use debug mode (verbose printing)
+- `--test` controls whether to use the test or production instance of Airflow to manage the DAG. The instances are running identical versions of Airflow, but the `test` instance may have "test" or more bleeding edge development DAGs.
+
+The `$SLURM_ARGS` must be provided in the same manner as when submitting an individual **managed** `Task` by hand to be run as batch job with the script above. **Note** that these parameters will be used as the starting point for the SLURM arguments of **every managed** `Task` in the DAG; however, individual steps in the DAG may have overrides built-in where appropriate to make sure that step is not submitted with potentially incompatible arguments. For example, a single threaded analysis `Task` may be capped to running on one core, even if in general everything should be running on 100 cores, per the SLURM argument provided. These caps are added during development and cannot be disabled through configuration changes in the YAML.
+
+**DAG List**
+- `find_peaks_index`
+- `psocake_sfx_phasing`
+- `pyalgos_sfx`
+
+
+#### DAG Submission from the `eLog`
+You can use the script in the previous section to submit jobs through the eLog. To do so navigate to the `Workflow > Definitions` tab using the blue navigation bar at the top of the eLog. On this tab, in the top-right corner (underneath the help and zoom icons) you can click the `+` sign to add a new workflow. This will bring up a "Workflow definition" UI window. When filling out the eLog workflow definition the following fields are needed (all of them):
+- `Name`: You can name the workflow anything you like. It should probably be something descriptive, e.g. if you are using LUTE to run smalldata_tools, you may call the workflow `lute_smd`.
+- `Executable`: In this field you will put the **full path** to the `submit_launch_airflow.sh` script: `/path/to/lute/launch_scripts/submit_launch_airflow.sh`.
+- `Parameters`: You will use the parameters as described above. Remember the first argument will be the **full path** to the `launch_airflow.py` script (this is NOT the same as the bash script used in the executable!): `/full/path/to/lute/launch_scripts/launch_airflow.py -c -w [--debug] [--test] $SLURM_ARGS`
+- `Location`: **Be sure to set to** `S3DF`.
+- `Trigger`: You can have the workflow trigger automatically or manually. Which option to choose will depend on the type of workflow you are running. In general the options `Manually triggered` (which displays as `MANUAL` on the definitions page) and `End of a run` (which displays as `END_OF_RUN` on the definitions page) are safe options for ALL workflows. The latter will be automatically submitted for you when data acquisition has finished. If you are running a workflow with **managed** `Task`s that work as data is being acquired (e.g. `SmallDataProducer`), you may also select `Start of a run` (which displays as `START_OF_RUN` on the definitions page).
+
+Upon clicking create you will see a new entry in the table on the definitions page. In order to run `MANUAL` workflows, or re-run automatic workflows, you must navigate to the `Workflows > Control` tab. For each acquisition run you will find a drop down menu under the `Job` column. To submit a workflow you select it from this drop down menu by the `Name` you provided when creating its definition.
+
+
+# Advanced Usage
+## Variable Substitution in YAML Files
+Using `validator`s, it is possible to define (generally, default) model parameters for a `Task` in terms of other parameters. It is also possible to use validated Pydantic model parameters to substitute values into a configuration file required to run a third party `Task` (e.g. some `Task`s may require their own JSON, TOML files, etc. to run properly). For more information on these types of substitutions, refer to the `new_task.md` documentation on `Task` creation.
+
+These types of substitutions, however, have a limitation in that they are not easily adapted at run time. They therefore address only a small number of the possible combinations in the dependencies between different input parameters. In order to support more complex relationships between parameters, variable substitutions can also be used in the configuration YAML itself. Using a syntax similar to `Jinja` templates, you can define values for YAML parameters in terms of other parameters or environment variables. The values are substituted before Pydantic attempts to validate the configuration.
+
+It is perhaps easiest to illustrate with an example. A test case is provided in `config/test_var_subs.yaml` and is reproduced here:
+
+```yaml
+%YAML 1.3
+---
+title: "Configuration to Test YAML Substitution"
+experiment: "TestYAMLSubs"
+run: 12
+date: "2024/05/01"
+lute_version: 0.1
+task_timeout: 600
+work_dir: "/sdf/scratch/users/d/dorlhiac"
+...
+---
+OtherTask:
+ useful_other_var: "USE ME!"
+
+NonExistentTask:
+ test_sub: "/path/to/{{ experiment }}/file_r{{ run:04d }}.input" # Substitute `experiment` and `run` from header above
+ test_env_sub: "/path/to/{{ $EXPERIMENT }}/file.input" # Substitute from the environment variable $EXPERIMENT
+ test_nested:
+ a: "outfile_{{ run }}_one.out" # Substitute `run` from header above
+ b:
+ c: "outfile_{{ run }}_two.out" # Also substitute `run` from header above
+ d: "{{ OtherTask.useful_other_var }}" # Substitute `useful_other_var` from `OtherTask`
+ test_fmt: "{{ run:04d }}" # Subsitute `run` and format as 0012
+ test_env_fmt: "{{ $RUN:04d }}" # Substitute environment variable $RUN and pad to 4 w/ zeros
+...
+```
+
+Input parameters in the config YAML can be substituted with either other input parameters or environment variables, with or without limited string formatting. All substitutions occur between double curly brackets: `{{ VARIABLE_TO_SUBSTITUTE }}`. Environment variables are indicated by `$` in front of the variable name. Parameters from the header, i.e. the first YAML document (top section) containing the `run`, `experiment`, version fields, etc. can be substituted without any qualification. If you want to use the `run` parameter, you can substitute it using `{{ run }}`. All other parameters, i.e. from other `Task`s or within `Task`s, must use a qualified name. Nested levels are delimited using a `.`. E.g. consider a structure like:
+
+```yaml
+Task:
+ param_set:
+ a: 1
+ b: 2
+ c: 3
+```
+In order to use parameter `c`, you would use `{{ Task.param_set.c }}` as the substitution.
+
+Take care when using substitutions! This process will not try to guess for you. When a substitution is not available, e.g. due to misspelling, one of two things will happen:
+- If it was an environment variable that does not exist, no substitution will be performed, although a message will be printed. I.e. you will be left with `param: /my/failed/{{ $SUBSTITUTION }}` as your parameter. This may or may not fail the model validation step, but is likely not what you intended.
+- If it was an attempt at substituting another YAML parameter which does not exist, an exception will be thrown and the program will exit.
+
+**Defining your own parameters**
+The configuration file is **not** validated in its totality, only on a `Task`-by-`Task` basis, but it **is read** in its totality. E.g. when running `MyTask` only that portion of the configuration is validated even though the entire file has been read, and is available for substitutions. As a result, it is safe to introduce extra entries into the YAML file, as long as they are not entered under a specific `Task`'s configuration. This may be useful to create your own global substitutions, for example if there is a key variable that may be used across different `Task`s.
+E.g. Consider a case where you want to create a more generic configuration file where a single variable is used by multiple `Task`s. This single variable may be changed between experiments, for instance, but is likely static for the duration of a single set of analyses. In order to avoid a mistake when changing the configuration between experiments you can define this special variable (or variables) as a separate entry in the YAML, and make use of substitutions in each `Task`'s configuration. This way the variable only needs to be changed in one place.
+
+```yaml
+# Define our substitution. This is only for substitutiosns!
+MY_SPECIAL_SUB: "EXPMT_DEPENDENT_VALUE" # Can change here once per experiment!
+
+RunTask1:
+ special_var: "{{ MY_SPECIAL_SUB }}"
+ var_1: 1
+ var_2: "a"
+ # ...
+
+RunTask2:
+ special_var: "{{ MY_SPECIAL_SUB }}"
+ var_3: "abcd"
+ var_4: 123
+ # ...
+
+RunTask3:
+ special_var: "{{ MY_SPECIAL_SUB }}"
+ #...
+
+# ... and so on
+```
+
+### Gotchas!
+**Order matters**
+While in general you can use parameters that appear later in a YAML document to substitute for values of parameters that appear earlier, the substitutions themselves will be performed in order of appearance. It is therefore **NOT possible** to correctly use a later parameter as a substitution for an earlier one, if the later one itself depends on a substitution. The YAML document, however, can be rearranged without error. The order in the YAML document has no effect on execution order which is determined purely by the workflow definition. As mentioned above, the document is not validated in its entirety so rearrangements are allowed. For example consider the following situation which produces an incorrect substitution:
+
+
+```yaml
+%YAML 1.3
+---
+title: "Configuration to Test YAML Substitution"
+experiment: "TestYAMLSubs"
+run: 12
+date: "2024/05/01"
+lute_version: 0.1
+task_timeout: 600
+work_dir: "/sdf/data/lcls/ds/exp/experiment/scratch"
+...
+---
+RunTaskOne:
+ input_dir: "{{ RunTaskTwo.path }}" # Will incorrectly be "{{ work_dir }}/additional_path/{{ $RUN }}"
+ # ...
+
+RunTaskTwo:
+ # Remember `work_dir` and `run` come from the header document and don't need to
+ # be qualified
+ path: "{{ work_dir }}/additional_path/{{ run }}"
+...
+```
+
+This configuration can be rearranged to achieve the desired result:
+
+```yaml
+%YAML 1.3
+---
+title: "Configuration to Test YAML Substitution"
+experiment: "TestYAMLSubs"
+run: 12
+date: "2024/05/01"
+lute_version: 0.1
+task_timeout: 600
+work_dir: "/sdf/data/lcls/ds/exp/experiment/scratch"
+...
+---
+RunTaskTwo:
+ # Remember `work_dir` comes from the header document and doesn't need to be qualified
+ path: "{{ work_dir }}/additional_path/{{ run }}"
+
+RunTaskOne:
+ input_dir: "{{ RunTaskTwo.path }}" # Will now be /sdf/data/lcls/ds/exp/experiment/scratch/additional_path/12
+ # ...
+...
+```
+
+On the otherhand, relationships such as these may point to inconsistencies in the dependencies between `Task`s which may warrant a refactor.
+
+**Found unhashable key**
+To avoid YAML parsing issues when using the substitution syntax, be sure to quote your substitutions. Before substitution is performed, a dictionary is first constructed by the `pyyaml` package which parses the document - it may fail to parse the document and raise an exception if the substitutions are not quoted.
+E.g.
+```yaml
+# USE THIS
+MyTask:
+ var_sub: "{{ other_var:04d }}"
+
+# **DO NOT** USE THIS
+MyTask:
+ var_sub: {{ other_var:04d }}
+```
+
+During validation, Pydantic will by default cast variables if possible, because of this it is generally safe to use strings for substitutions. E.g. if your parameter is expecting an integer, and after substitution you pass `"2"`, Pydantic will cast this to the `int` `2`, and validation will succeed. As part of the substitution process limited type casting will also be handled if it is necessary for any formatting strings provided. E.g. `"{{ run:04d }}"` requires that run be an integer, so it will be treated as such in order to apply the formatting.
+
+## Debug Environment Variables
+Special markers have been inserted at certain points in the execution flow for LUTE. These can be enabled by setting the environment variables detailed below. These are intended to allow developers to exit the program at certain points to investigate behaviour or a bug. For instance, when working on configuration parsing, an environment variable can be set which exits the program after passing this step. This allows you to run LUTE otherwise as normal (described above), without having to modify any additional code or insert your own early exits.
+
+Types of debug markers:
+- `LUTE_DEBUG_EXIT`: Will exit the program at this point if the corresponding environment variable has been set.
+
+Developers can insert these markers as needed into their code to add new exit points, although as a rule of thumb they should be used sparingly, and generally only after major steps in the execution flow (e.g. after parsing, after beginning a task, after returning a result, etc.).
+
+In order to include a new marker in your code:
+```py
+from lute.execution.debug_utils import LUTE_DEBUG_EXIT
+
+def my_code() -> None:
+ # ...
+ LUTE_DEBUG_EXIT("MYENVVAR", "Additional message to print")
+ # If MYENVVAR is not set, the above function does nothing
+```
+
+You can enable a marker by setting to 1, e.g. to enable the example marker above while running `Tester`:
+```bash
+MYENVVAR=1 python -B run_task.py -t Tester -c config/test.yaml
+```
+
+### Currently used environment variables
+- `LUTE_DEBUG_EXIT_AT_YAML`: Exits the program after reading in a YAML configuration file and performing variable substitutions, but BEFORE Pydantic validation.
+- `LUTE_DEBUG_BEFORE_TPP_EXEC`: Exits the program after a ThirdPartyTask has prepared its submission command, but before `exec` is used to run it.
+
+## Airflow Launch and DAG Execution Steps
+The Airflow launch process actually involves a number of steps, and is rather complicated. There are two wrapper steps prior to getting to the actual Airflow API communication.
+1. `launch_scripts/submit_launch_airflow.sh` is run.
+2. This script calls `/sdf/group/lcls/ds/tools/lute_launcher` with all the same parameters that it was called with.
+3. `lute_launcher` runs the `launch_scripts/launch_airflow.py` script which was provided as the first argument. This is the **true** launch script
+4. `launch_airflow.py` communicates with the Airflow API, requesting that a specific DAG be launched. It then continues to run, and gathers the individual logs and the exit status of each step of the DAG.
+5. Airflow will then enter a loop of communication where it asks the JID to submit each step of the requested DAG as batch job using `launch_scripts/submit_slurm.sh`.
+
+There are some specific reasons for this complexity:
+- The use of `submit_launch_airflow.sh` as a thin-wrapper around `lute_launcher` is to allow the true Airflow launch script to be a long-lived job. This is for compatibility with the eLog and the ARP. When run from the eLog as a workflow, the job submission process must occur within 30 seconds due to a timeout built-in to the system. This is fine when submitting jobs to run on the batch-nodes, as the submission to the queue takes very little time. So here, `submit_launch_airflow.sh` serves as a thin script to have `lute_launcher` run as a batch job. It can then run as a long-lived job (for the duration of the entire DAG) collecting log files all in one place. This allows the log for each stage of the Airflow DAG to be inspected in a single file, and through the eLog browser interface.
+- The use `lute_launcher` as a wrapper around `launch_airflow.py` is to manage authentication and credentials. The `launch_airflow.py` script requires loading credentials in order to authenticate against the Airflow API. For the average user this is not possible, unless the script is run from within the `lute_launcher` process.
diff --git a/launch_scripts/submit_slurm.sh b/launch_scripts/submit_slurm.sh
index 473ee1dc..38a0b212 100755
--- a/launch_scripts/submit_slurm.sh
+++ b/launch_scripts/submit_slurm.sh
@@ -7,6 +7,8 @@ $(basename "$0"):
Options:
-c|--config
ABSOLUTE path to the LUTE configuration YAML. Must be absolute.
+ --debug
+ Whether to run in debug mode (verbose printing).
-h|--help
Display this message.
-t|--taskname
diff --git a/lute/execution/debug_utils.py b/lute/execution/debug_utils.py
new file mode 100644
index 00000000..7051a7f1
--- /dev/null
+++ b/lute/execution/debug_utils.py
@@ -0,0 +1,59 @@
+"""Functions to assist in debugging execution of LUTE.
+
+Functions:
+ LUTE_DEBUG_EXIT(env_var: str, str_dump: Optional[str]): Exits the program if
+ the provided `env_var` is set. Optionally, also prints a message if
+ provided.
+
+Exceptions:
+ ValidationError: Error raised by pydantic during data validation. (From
+ Pydantic)
+"""
+
+__all__ = ["LUTE_DEBUG_EXIT"]
+__author__ = "Gabriel Dorlhiac"
+
+import os
+import sys
+import types
+from typing import Optional
+
+
+def _stack_inspect(msg: str, str_dump: Optional[str] = None) -> None:
+ import inspect
+
+ curr_frame: Optional[types.FrameType] = inspect.currentframe()
+ frame: Optional[types.FrameType]
+ if curr_frame:
+ frame = curr_frame.f_back
+ if frame:
+ frame = frame.f_back # Go back two stack frames...
+ else:
+ frame = None
+ if frame:
+ file_name: str = frame.f_code.co_filename
+ line_no: int = frame.f_lineno
+ msg = f"{msg} {file_name}, line: {line_no}"
+ else:
+ msg = f"{msg} Stack frame not retrievable..."
+ if str_dump is not None:
+ msg = f"{msg}\n{str_dump}"
+
+ print(msg, flush=True)
+
+
+def LUTE_DEBUG_EXIT(env_var: str, str_dump: Optional[str] = None) -> None:
+ if os.getenv(env_var, None):
+ msg: str = "LUTE_DEBUG_EXIT -"
+ _stack_inspect(msg, str_dump)
+ sys.exit(0)
+
+
+def LUTE_DEBUG_PAUSE(env_var: str, str_dump: Optional[str] = None) -> None:
+ # Need custom signal handlers to implement resume
+ if os.getenv(env_var, None):
+ import signal
+
+ msg: str = "LUTE_DEBUG_PAUSE -"
+ _stack_inspect(msg, str_dump)
+ signal.pause()
diff --git a/lute/io/config.py b/lute/io/config.py
index 8686db9c..84f3e87a 100644
--- a/lute/io/config.py
+++ b/lute/io/config.py
@@ -14,12 +14,11 @@
__all__ = ["parse_config"]
__author__ = "Gabriel Dorlhiac"
-import os
+import re
import warnings
-from abc import ABC
-from typing import List, Dict, Iterator, Dict, Any, Union, Optional
+from typing import List, Dict, Iterator, Dict, Any
-import yaml
+import pprint
import yaml
from pydantic import (
BaseModel,
@@ -35,6 +34,131 @@
from pydantic.dataclasses import dataclass
from .models import *
+from lute.execution.debug_utils import LUTE_DEBUG_EXIT
+
+
+def _isfloat(string: str) -> bool:
+ try:
+ float(string)
+ return True
+ except ValueError:
+ return False
+
+
+def _check_str_numeric(string: str) -> Union[str, int, float]:
+ """Check if a string is an integer or float and return it as such.
+
+ Args:
+ string (str): Input string to check.
+
+ Returns:
+ output (str | int | float): Returns an int or float if the string can be
+ converted to one. Otherwise returns the original string.
+ """
+ if string.isnumeric():
+ return int(string)
+ elif _isfloat(string):
+ return float(string)
+ else:
+ return string
+
+
+def substitute_variables(
+ header: Dict[str, Any], config: Dict[str, Any], curr_key: Optional[str] = None
+) -> None:
+ """Performs variable substitutions on a dictionary read from config YAML file.
+
+ Can be used to define input parameters in terms of other input parameters.
+ This is similar to functionality employed by validators for parameters in
+ the specific Task models, but is intended to be more accessible to users.
+ Variable substitutions are defined using a minimal syntax from Jinja:
+ {{ experiment }}
+ defines a substitution of the variable `experiment`. The characters `{{ }}`
+ can be escaped if the literal symbols are needed in place.
+
+ For example, a path to a file can be defined in terms of experiment and run
+ values in the config file:
+ MyTask:
+ experiment: myexp
+ run: 2
+ special_file: /path/to/{{ experiment }}/{{ run }}/file.inp
+
+ Acceptable variables for substitutions are values defined elsewhere in the
+ YAML file. Environment variables can also be used if prefaced with a `$`
+ character. E.g. to get the experiment from an environment variable:
+ MyTask:
+ run: 2
+ special_file: /path/to/{{ $EXPERIMENT }}/{{ run }}/file.inp
+
+ Args:
+ config (Dict[str, Any]): A dictionary of parsed configuration.
+
+ curr_key (Optional[str]): Used to keep track of recursion level when scanning
+ through iterable items in the config dictionary.
+
+ Returns:
+ subbed_config (Dict[str, Any]): The config dictionary after substitutions
+ have been made. May be identical to the input if no substitutions are
+ needed.
+ """
+ _sub_pattern = r"\{\{[^}{]*\}\}"
+ iterable: Dict[str, Any] = config
+ if curr_key is not None:
+ # Need to handle nested levels by interpreting curr_key
+ keys_by_level: List[str] = curr_key.split(".")
+ for key in keys_by_level:
+ iterable = iterable[key]
+ else:
+ ...
+ # iterable = config
+ for param, value in iterable.items():
+ if isinstance(value, dict):
+ new_key: str
+ if curr_key is None:
+ new_key = param
+ else:
+ new_key = f"{curr_key}.{param}"
+ substitute_variables(header, config, curr_key=new_key)
+ elif isinstance(value, list):
+ ...
+ # Scalars str - we skip numeric types
+ elif isinstance(value, str):
+ matches: List[str] = re.findall(_sub_pattern, value)
+ for m in matches:
+ key_to_sub_maybe_with_fmt: List[str] = m[2:-2].strip().split(":")
+ key_to_sub: str = key_to_sub_maybe_with_fmt[0]
+ fmt: Optional[str] = None
+ if len(key_to_sub_maybe_with_fmt) == 2:
+ fmt = key_to_sub_maybe_with_fmt[1]
+ sub: Any
+ if key_to_sub[0] == "$":
+ sub = os.getenv(key_to_sub[1:], None)
+ if sub is None:
+ print(
+ f"Environment variable {key_to_sub[1:]} not found! Cannot substitute in YAML config!",
+ flush=True,
+ )
+ continue
+ # substitutions from env vars will be strings, so convert back
+ # to numeric in order to perform formatting later on (e.g. {var:04d})
+ sub = _check_str_numeric(sub)
+ else:
+ try:
+ sub = config
+ for key in key_to_sub.split("."):
+ sub = sub[key]
+ except KeyError:
+ sub = header[key_to_sub]
+ pattern: str = (
+ m.replace("{{", r"\{\{").replace("}}", r"\}\}").replace("$", r"\$")
+ )
+ if fmt is not None:
+ sub = f"{sub:{fmt}}"
+ else:
+ sub = f"{sub}"
+ iterable[param] = re.sub(pattern, sub, iterable[param])
+ # Reconvert back to numeric values if needed...
+ iterable[param] = _check_str_numeric(iterable[param])
def parse_config(task_name: str = "test", config_path: str = "") -> TaskParameters:
@@ -60,7 +184,9 @@ def parse_config(task_name: str = "test", config_path: str = "") -> TaskParamete
docs: Iterator[Dict[str, Any]] = yaml.load_all(stream=f, Loader=yaml.FullLoader)
header: Dict[str, Any] = next(docs)
config: Dict[str, Any] = next(docs)
-
+ substitute_variables(header, header)
+ substitute_variables(header, config)
+ LUTE_DEBUG_EXIT("LUTE_DEBUG_EXIT_AT_YAML", pprint.pformat(config))
lute_config: Dict[str, AnalysisHeader] = {"lute_config": AnalysisHeader(**header)}
try:
task_config: Dict[str, Any] = dict(config[task_name])
@@ -73,5 +199,4 @@ def parse_config(task_name: str = "test", config_path: str = "") -> TaskParamete
)
)
parsed_parameters: TaskParameters = globals()[task_config_name](**lute_config)
-
return parsed_parameters
diff --git a/lute/io/elog.py b/lute/io/elog.py
index 4ccac1d2..85d71727 100644
--- a/lute/io/elog.py
+++ b/lute/io/elog.py
@@ -56,8 +56,6 @@
from typing import Any, Dict, Optional, List, Union, Tuple
from io import BufferedReader
-from krtc import KerberosTicket
-
from .exceptions import ElogFileFormatError
if __debug__:
@@ -222,6 +220,8 @@ def get_elog_kerberos_auth() -> Dict[str, str]:
Returns:
auth (Dict[str, str]): Dictionary containing Kerberos authorization key.
"""
+ from krtc import KerberosTicket
+
return KerberosTicket("HTTP@pswww.slac.stanford.edu").getAuthHeaders()
diff --git a/lute/io/models/base.py b/lute/io/models/base.py
index 0a0f7389..b57a1538 100644
--- a/lute/io/models/base.py
+++ b/lute/io/models/base.py
@@ -7,7 +7,7 @@
TaskParameters(BaseSettings): Base class for Task parameters. Subclasses
specify a model of parameters and their types for validation.
- BaseBinaryParameters(TaskParameters): Base class for Third-party, binary
+ ThirdPartyParameters(TaskParameters): Base class for Third-party, binary
executable Tasks.
TemplateParameters: Dataclass to represent parameters of binary
@@ -22,7 +22,7 @@
"AnalysisHeader",
"TemplateConfig",
"TemplateParameters",
- "BaseBinaryParameters",
+ "ThirdPartyParameters",
]
__author__ = "Gabriel Dorlhiac"
@@ -139,7 +139,7 @@ class TemplateParameters:
params: Any
-class BaseBinaryParameters(TaskParameters):
+class ThirdPartyParameters(TaskParameters):
"""Base class for third party task parameters.
Contains special validators for extra arguments and handling of parameters
diff --git a/lute/io/models/sfx_find_peaks.py b/lute/io/models/sfx_find_peaks.py
index 0e3a6924..dbc165b7 100644
--- a/lute/io/models/sfx_find_peaks.py
+++ b/lute/io/models/sfx_find_peaks.py
@@ -4,7 +4,7 @@
from pydantic import BaseModel, Field, PositiveInt, validator
-from .base import BaseBinaryParameters, TaskParameters, TemplateConfig
+from .base import ThirdPartyParameters, TaskParameters, TemplateConfig
class FindPeaksPyAlgosParameters(TaskParameters):
@@ -125,7 +125,7 @@ def validate_out_file(cls, out_file: str, values: Dict[str, Any]) -> str:
return out_file
-class FindPeaksPsocakeParameters(BaseBinaryParameters):
+class FindPeaksPsocakeParameters(ThirdPartyParameters):
class SZParameters(BaseModel):
compressor: Literal["qoz", "sz3"] = Field(
diff --git a/lute/io/models/sfx_index.py b/lute/io/models/sfx_index.py
index 4801dd15..bf207b8a 100644
--- a/lute/io/models/sfx_index.py
+++ b/lute/io/models/sfx_index.py
@@ -1,7 +1,7 @@
"""Models for serial femtosecond crystallography indexing.
Classes:
- IndexCrystFELParameters(BaseBinaryParameters): Perform indexing of hits/peaks using
+ IndexCrystFELParameters(ThirdPartyParameters): Perform indexing of hits/peaks using
CrystFEL's `indexamajig`.
"""
@@ -23,10 +23,10 @@
)
from ..db import read_latest_db_entry
-from .base import BaseBinaryParameters, TaskParameters
+from .base import ThirdPartyParameters, TaskParameters
-class IndexCrystFELParameters(BaseBinaryParameters):
+class IndexCrystFELParameters(ThirdPartyParameters):
"""Parameters for CrystFEL's `indexamajig`.
There are many parameters, and many combinations. For more information on
@@ -34,7 +34,7 @@ class IndexCrystFELParameters(BaseBinaryParameters):
https://www.desy.de/~twhite/crystfel/manual-indexamajig.html
"""
- class Config(BaseBinaryParameters.Config):
+ class Config(ThirdPartyParameters.Config):
long_flags_use_eq: bool = True
"""Whether long command-line arguments are passed like `--long=arg`."""
diff --git a/lute/io/models/sfx_merge.py b/lute/io/models/sfx_merge.py
index 7b4f1304..a583dddd 100644
--- a/lute/io/models/sfx_merge.py
+++ b/lute/io/models/sfx_merge.py
@@ -1,13 +1,13 @@
"""Models for merging reflections in serial femtosecond crystallography.
Classes:
- MergePartialatorParameters(BaseBinaryParameters): Perform merging using
+ MergePartialatorParameters(ThirdPartyParameters): Perform merging using
CrystFEL's `partialator`.
- CompareHKLParameters(BaseBinaryParameters): Calculate figures of merit using
+ CompareHKLParameters(ThirdPartyParameters): Calculate figures of merit using
CrystFEL's `compare_hkl`.
- ManipulateHKLParameters(BaseBinaryParameters): Perform transformations on
+ ManipulateHKLParameters(ThirdPartyParameters): Perform transformations on
lists of reflections using CrystFEL's `get_hkl`.
"""
@@ -23,11 +23,11 @@
from pydantic import Field, validator
-from .base import BaseBinaryParameters
+from .base import ThirdPartyParameters
from ..db import read_latest_db_entry
-class MergePartialatorParameters(BaseBinaryParameters):
+class MergePartialatorParameters(ThirdPartyParameters):
"""Parameters for CrystFEL's `partialator`.
There are many parameters, and many combinations. For more information on
@@ -35,7 +35,7 @@ class MergePartialatorParameters(BaseBinaryParameters):
https://www.desy.de/~twhite/crystfel/manual-partialator.html
"""
- class Config(BaseBinaryParameters.Config):
+ class Config(ThirdPartyParameters.Config):
long_flags_use_eq: bool = True
"""Whether long command-line arguments are passed like `--long=arg`."""
@@ -209,7 +209,7 @@ def validate_out_file(cls, out_file: str, values: Dict[str, Any]) -> str:
return out_file
-class CompareHKLParameters(BaseBinaryParameters):
+class CompareHKLParameters(ThirdPartyParameters):
"""Parameters for CrystFEL's `compare_hkl` for calculating figures of merit.
There are many parameters, and many combinations. For more information on
@@ -217,7 +217,7 @@ class CompareHKLParameters(BaseBinaryParameters):
https://www.desy.de/~twhite/crystfel/manual-partialator.html
"""
- class Config(BaseBinaryParameters.Config):
+ class Config(ThirdPartyParameters.Config):
long_flags_use_eq: bool = True
"""Whether long command-line arguments are passed like `--long=arg`."""
@@ -337,7 +337,7 @@ def validate_shell_file(cls, shell_file: str, values: Dict[str, Any]) -> str:
return shell_file
-class ManipulateHKLParameters(BaseBinaryParameters):
+class ManipulateHKLParameters(ThirdPartyParameters):
"""Parameters for CrystFEL's `get_hkl` for manipulating lists of reflections.
This Task is predominantly used internally to convert `hkl` to `mtz` files.
@@ -347,7 +347,7 @@ class ManipulateHKLParameters(BaseBinaryParameters):
https://www.desy.de/~twhite/crystfel/manual-partialator.html
"""
- class Config(BaseBinaryParameters.Config):
+ class Config(ThirdPartyParameters.Config):
long_flags_use_eq: bool = True
"""Whether long command-line arguments are passed like `--long=arg`."""
diff --git a/lute/io/models/sfx_solve.py b/lute/io/models/sfx_solve.py
index 5f00c097..1b534fc0 100644
--- a/lute/io/models/sfx_solve.py
+++ b/lute/io/models/sfx_solve.py
@@ -1,7 +1,7 @@
"""Models for structure solution in serial femtosecond crystallography.
Classes:
- DimpleSolveParameters(BaseBinaryParameters): Perform structure solution
+ DimpleSolveParameters(ThirdPartyParameters): Perform structure solution
using CCP4's dimple (molecular replacement).
"""
@@ -13,11 +13,11 @@
from pydantic import Field, validator, PositiveFloat, PositiveInt
-from .base import BaseBinaryParameters
+from .base import ThirdPartyParameters
from ..db import read_latest_db_entry
-class DimpleSolveParameters(BaseBinaryParameters):
+class DimpleSolveParameters(ThirdPartyParameters):
"""Parameters for CCP4's dimple program.
There are many parameters. For more information on
@@ -197,7 +197,7 @@ def validate_out_dir(cls, out_dir: str, values: Dict[str, Any]) -> str:
return out_dir
-class RunSHELXCParameters(BaseBinaryParameters):
+class RunSHELXCParameters(ThirdPartyParameters):
"""Parameters for CCP4's SHELXC program.
SHELXC prepares files for SHELXD and SHELXE.
diff --git a/lute/io/models/smd.py b/lute/io/models/smd.py
index ebeb10d4..cab7fa50 100644
--- a/lute/io/models/smd.py
+++ b/lute/io/models/smd.py
@@ -1,7 +1,7 @@
"""Models for smalldata_tools Tasks.
Classes:
- SubmitSMDParameters(BaseBinaryParameters): Parameters to run smalldata_tools
+ SubmitSMDParameters(ThirdPartyParameters): Parameters to run smalldata_tools
to produce a smalldata HDF5 file.
FindOverlapXSSParameters(TaskParameters): Parameter model for the
@@ -24,10 +24,10 @@
validator,
)
-from .base import TaskParameters, BaseBinaryParameters, TemplateConfig
+from .base import TaskParameters, ThirdPartyParameters, TemplateConfig
-class SubmitSMDParameters(BaseBinaryParameters):
+class SubmitSMDParameters(ThirdPartyParameters):
"""Parameters for running smalldata to produce reduced HDF5 files."""
executable: str = Field("mpirun", description="MPI executable.", flag_type="")
diff --git a/lute/io/models/tests.py b/lute/io/models/tests.py
index 089e7624..1958e663 100644
--- a/lute/io/models/tests.py
+++ b/lute/io/models/tests.py
@@ -4,7 +4,7 @@
TestParameters(TaskParameters): Model for most basic test case. Single
core first-party Task. Uses only communication via pipes.
- TestBinaryParameters(BaseBinaryParameters): Parameters for a simple multi-
+ TestBinaryParameters(ThirdPartyParameters): Parameters for a simple multi-
threaded binary executable.
TestSocketParameters(TaskParameters): Model for first-party test requiring
@@ -35,7 +35,7 @@
validator,
)
-from .base import TaskParameters, BaseBinaryParameters
+from .base import TaskParameters, ThirdPartyParameters
from ..db import read_latest_db_entry
@@ -53,12 +53,12 @@ class CompoundVar(BaseModel):
throw_error: bool = False
-class TestBinaryParameters(BaseBinaryParameters):
+class TestBinaryParameters(ThirdPartyParameters):
executable: str = "/sdf/home/d/dorlhiac/test_tasks/test_threads"
p_arg1: int = 1
-class TestBinaryErrParameters(BaseBinaryParameters):
+class TestBinaryErrParameters(ThirdPartyParameters):
"""Same as TestBinary, but exits with non-zero code."""
executable: str = "/sdf/home/d/dorlhiac/test_tasks/test_threads_err"
diff --git a/lute/tasks/task.py b/lute/tasks/task.py
index d103c72b..5b132623 100644
--- a/lute/tasks/task.py
+++ b/lute/tasks/task.py
@@ -23,6 +23,7 @@
AnalysisHeader,
)
from ..execution.ipc import *
+from ..execution.debug_utils import LUTE_DEBUG_EXIT
from .dataclasses import *
if __debug__:
@@ -344,6 +345,7 @@ def _run(self) -> None:
time.sleep(0.1)
msg: Message = Message(contents=self._formatted_command())
self._report_to_executor(msg)
+ LUTE_DEBUG_EXIT("LUTE_DEBUG_BEFORE_TPP_EXEC")
os.execvp(file=self._cmd, args=self._args_list)
def _formatted_command(self) -> str:
diff --git a/subprocess_task.py b/subprocess_task.py
index ae30e71a..75099da3 100644
--- a/subprocess_task.py
+++ b/subprocess_task.py
@@ -8,7 +8,7 @@
from lute.tasks.task import Task, ThirdPartyTask
from lute.execution.ipc import Message
from lute.io.config import *
-from lute.io.models.base import TaskParameters, BaseBinaryParameters
+from lute.io.models.base import TaskParameters, ThirdPartyParameters
def get_task() -> Optional[Task]:
@@ -58,7 +58,7 @@ def timeout_handler(signum: int, frame: types.FrameType) -> None:
# Hack to avoid importing modules with conflicting dependencie
TaskType: Type[Task]
-if isinstance(task_parameters, BaseBinaryParameters):
+if isinstance(task_parameters, ThirdPartyParameters):
TaskType = ThirdPartyTask
else:
from lute.tasks import import_task, TaskNotFoundError
diff --git a/utilities/src/help/task_parameters.py b/utilities/src/help/task_parameters.py
index b49af76e..0538b4b9 100644
--- a/utilities/src/help/task_parameters.py
+++ b/utilities/src/help/task_parameters.py
@@ -16,7 +16,7 @@ class PropertyDict(TypedDict):
title: str
type: Optional[str]
anyOf: Optional[List[Dict[str, str]]] # Either an anyOf or type per property
- # Generally only for BinaryTasks
+ # Generally only for ThirdPartyTasks
rename_param: Optional[str]
flag_type: Optional[str]
# Other additional field attributes