From a1dc86f4eb0e84f3a1f62416f8f9f09e2e56ac76 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 29 May 2024 12:32:30 -0400 Subject: [PATCH 1/7] Fix example code references in group tutorial. --- doc/src/guide/tutorial/group-workflow3.toml | 2 ++ doc/src/guide/tutorial/group-workflow4.toml | 2 ++ doc/src/guide/tutorial/group-workflow5.toml | 2 ++ doc/src/guide/tutorial/group.md | 20 +++++++++++--------- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/src/guide/tutorial/group-workflow3.toml b/doc/src/guide/tutorial/group-workflow3.toml index 30095d9..20bf090 100644 --- a/doc/src/guide/tutorial/group-workflow3.toml +++ b/doc/src/guide/tutorial/group-workflow3.toml @@ -5,7 +5,9 @@ value_file = "value.json" name = "process_point" command = "echo {directory}" [action.group] +# ANCHOR: sort sort_by = ["/x"] +# ANCHOR_END: sort [[action.group.include]] condition = ["/type", "==", "point"] diff --git a/doc/src/guide/tutorial/group-workflow4.toml b/doc/src/guide/tutorial/group-workflow4.toml index e36f93b..b10d621 100644 --- a/doc/src/guide/tutorial/group-workflow4.toml +++ b/doc/src/guide/tutorial/group-workflow4.toml @@ -6,7 +6,9 @@ name = "process_point" command = "echo {directory}" [action.group] sort_by = ["/x"] +# ANCHOR: split split_by_sort_key = true +# ANCHOR_END: split [[action.group.include]] condition = ["/type", "==", "point"] diff --git a/doc/src/guide/tutorial/group-workflow5.toml b/doc/src/guide/tutorial/group-workflow5.toml index 4a93733..7c23fa3 100644 --- a/doc/src/guide/tutorial/group-workflow5.toml +++ b/doc/src/guide/tutorial/group-workflow5.toml @@ -6,7 +6,9 @@ name = "process_point" command = "echo {directory}" [action.group] sort_by = ["/x"] +# ANCHOR: max maximum_size = 4 +# ANCHOR_END: max [[action.group.include]] condition = ["/type", "==", "point"] diff --git a/doc/src/guide/tutorial/group.md b/doc/src/guide/tutorial/group.md index 28f6dc9..94b8641 100644 --- a/doc/src/guide/tutorial/group.md +++ b/doc/src/guide/tutorial/group.md @@ -41,10 +41,11 @@ read thousands of files every time you execute a **row** command. ## Grouping by value -Now that your workspace directories have **values**, you can use those to -form **groups**. Every action in your workflow operates on **groups**. Set the -`action.group.include` key in an action to select which directories to include by +Now that your workspace directories have **values**, you can use those to form +**groups**. Every action in your workflow operates on **groups**. Add entries to the +`action.group.include` array in an action to select which directories to include by **value**. To see how this works, replace the contents of `workflow.toml` with: + ```toml {{#include group-workflow2.toml}} ``` @@ -53,14 +54,15 @@ This workflow will apply the `process_point` action to the directories where `value/type == "point"` and the `process_letter` action to the directories where `value/type == "letter"`. -`condition` is a length 3 array with the contents: `[JSON pointer, operator, operand]`. -Think of each element as an expression. The +`action.group.include` is an array of conditions. A directory is included when *any* +condition is true. `condition` is a length 3 array with the contents: `[JSON pointer, +operator, operand]`. Think of the condition as an expression. The [*JSON pointer*](../concepts/json-pointers.md) is a string that reads a particular value from the directory's **value**. The *operator* is a comparison operator: `"<"`, `"<="`, `"=="`, `">="`, or `">"`. The *operand* is the value to compare to. Together, these 3 elements make a *condition*. -**Row** applies the *condition* to all directories in the workspace. When the +**Row** applies each *condition* to all directories in the workspace. When a *condition* is true, the directory is included in the action's **groups**. > Note: This implies that every JSON pointer used in an `include` condition **MUST** @@ -114,7 +116,7 @@ behavior. You can choose to instead sort **groups** by any number of **value** e To demonstrate, add the line: ```toml -{{#include group-workflow3.toml:9}} +{{#include group-workflow3.toml:sort}} ``` to the `[action.group]` table for the `"process_point"` action. @@ -169,7 +171,7 @@ groups to a `maximum_size`. Add the line: ```toml -{{#include group-workflow4.toml:10}} +{{#include group-workflow4.toml:split}} ``` to the `[action.group]` table for the `"process_point"` action. @@ -225,7 +227,7 @@ of other ways that you might utilize `split_by_sort_key` in your workflows. **Row** can also limit groups to a maximum size. To see how this works, **REPLACE** the `split_by_sort_key = true` line with: ```toml -{{#include group-workflow5.toml:10}} +{{#include group-workflow5.toml:max}} ``` Now: From addc9ccd6715514e063632d9aa32323076a1b35d Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 29 May 2024 12:33:24 -0400 Subject: [PATCH 2/7] Remove remaining uses of specific line numbers. Specific line numbers are prone to error. Using only anchors, the documentation will be less likely to be invalid in the future. --- doc/src/guide/python/actions.md | 4 ++-- doc/src/guide/python/signac-workflow.toml | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/src/guide/python/actions.md b/doc/src/guide/python/actions.md index 95a843b..d42dd19 100644 --- a/doc/src/guide/python/actions.md +++ b/doc/src/guide/python/actions.md @@ -65,13 +65,13 @@ signac jobs and calls the requested **action** function. Next, replace the contents of `workflow.toml` with the corresponding workflow: ```toml -{{#include signac-workflow.toml}} +{{#include signac-workflow.toml:whole}} ``` *Both* actions have the same **command**, set once by the [**default action**](../../workflow/default.md): ```toml -{{#include signac-workflow.toml:5}} +{{#include signac-workflow.toml:default}} ``` `python actions.py` executes the `actions.py` file above. It is given the argument diff --git a/doc/src/guide/python/signac-workflow.toml b/doc/src/guide/python/signac-workflow.toml index 67e589e..e2ec5a1 100644 --- a/doc/src/guide/python/signac-workflow.toml +++ b/doc/src/guide/python/signac-workflow.toml @@ -1,8 +1,11 @@ +# ANCHOR: whole [workspace] value_file = "signac_statepoint.json" [default.action] +# ANCHOR: default command = "python actions.py --action $ACTION_NAME {directories}" +# ANCHOR_END: default [[action]] name = "square" @@ -14,3 +17,4 @@ name = "compute_sum" previous_actions = ["square"] resources.walltime.per_directory = "00:00:01" group.submit_whole = true +# ANCHOR_END: whole From 2994471a3c82eb163dec8de04a11c65ae1e2102b Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 29 May 2024 12:36:34 -0400 Subject: [PATCH 3/7] Document fixed tutorial in the change log. --- doc/src/release-notes.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/src/release-notes.md b/doc/src/release-notes.md index bafa0d9..e0b0da0 100644 --- a/doc/src/release-notes.md +++ b/doc/src/release-notes.md @@ -1,5 +1,11 @@ # Release notes +## 0.1.2 (2024-05-29) + +*Fixed:* + +* Erroneous code examples in the *Grouping directories* tutorial. + ## 0.1.1 (2024-05-29) *Added:* From a048ac49b9b3251ee9a87b8c2ff4cbdfe60675b5 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Wed, 29 May 2024 12:38:39 -0400 Subject: [PATCH 4/7] Run pre-commit. --- doc/src/guide/tutorial/group.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/guide/tutorial/group.md b/doc/src/guide/tutorial/group.md index 94b8641..513db2b 100644 --- a/doc/src/guide/tutorial/group.md +++ b/doc/src/guide/tutorial/group.md @@ -56,7 +56,7 @@ This workflow will apply the `process_point` action to the directories where `action.group.include` is an array of conditions. A directory is included when *any* condition is true. `condition` is a length 3 array with the contents: `[JSON pointer, -operator, operand]`. Think of the condition as an expression. The +operator, operand]`. Think of the condition as an expression. The [*JSON pointer*](../concepts/json-pointers.md) is a string that reads a particular value from the directory's **value**. The *operator* is a comparison operator: `"<"`, `"<="`, `"=="`, `">="`, or `">"`. The *operand* is the value to compare to. Together, these 3 From 8ec697fdd742b783559cf168e7aa3d18f85bdfa7 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 30 May 2024 08:33:04 -0400 Subject: [PATCH 5/7] Additional revisions. --- doc/src/guide/python/actions.md | 21 +++++++++++---------- doc/src/guide/python/signac-workflow.toml | 2 +- doc/src/guide/tutorial/group.md | 4 ++-- doc/src/guide/tutorial/submit.md | 12 ++++++------ doc/src/workflow/action/group.md | 8 ++++---- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/doc/src/guide/python/actions.md b/doc/src/guide/python/actions.md index d42dd19..3a4cf8b 100644 --- a/doc/src/guide/python/actions.md +++ b/doc/src/guide/python/actions.md @@ -1,8 +1,8 @@ # Writing action commands in Python -In **row**, actions execute arbitrary **shell commands**. When your action is -**Python** code, you must structure that code so that it is a command line tool -that takes directories as arguments. There are many ways you can achieve this goal. +In **row**, actions execute arbitrary **shell commands**. When your action is **Python** +code, you must wrap it with command line parsing that takes directories as arguments. +There are many ways you can achieve this goal. This guide will show you how to structure all of your actions in a single file: `actions.py`. This layout is inspired by **row's** predecessor **signac-flow** @@ -54,12 +54,14 @@ Now, create a file `actions.py` with the contents: This file defines each **action** as a function with the same name. These functions take an array of jobs as an argument: `def square(*jobs)` and `def compute_sum(*jobs)`. The `if __name__ == "__main__":` block parses the command line arguments, forms an array of -signac jobs and calls the requested **action** function. +signac jobs, and calls the requested **action** function. > Note: This example demonstrates looping over directories in **serial**. However, this -> structure also gives you the power to choose **serial** or **parallel** execution. +> structure also gives you the ability to choose **serial** or **[parallel]** execution. > Grouping many directories into a single cluster job submission will increase your -> workflow's throughput. +> workflow's throuability. + +[parallel]: ../concepts/process-parallelism.md ## Write workflow.toml @@ -78,8 +80,8 @@ Next, replace the contents of `workflow.toml` with the corresponding workflow: `--action $ACTION_NAME` which selects the Python function to call. Here `$ACTION_NAME` is an [environment variable](../../env.md) that **row** sets in job scripts. The last arguments are given by `{directories}`. Unlike `{directory}` shown in previous -tutorials, `{directories}` expands to *ALL* directories in the submitted **group**. In -this way, `action.py` is executed once and is free to process the list of directories in +tutorials, `{directories}` expands to *ALL* directories in the submitted **group**. +`action.py` is executed once and is free to process the list of directories in any way it chooses (e.g. in serial, with [multiprocessing parallelism, multiple threads](../concepts/thread-parallelism.md), using [MPI parallelism](../concepts/process-parallelism.md), ...). @@ -130,8 +132,7 @@ these steps: > Note: You may write functions that take only one job `def action(job)` without > modifying the given implementation of `__main__`. However, you will need to set > `action.group.maximum_size = 1` or use `{directory}` to ensure that `action.py` is -> given a single directory. If you implement your code using arrays, you can use -> **row's** grouping functionality to your benefit. +> given a single directory. ## Next steps diff --git a/doc/src/guide/python/signac-workflow.toml b/doc/src/guide/python/signac-workflow.toml index e2ec5a1..bea343c 100644 --- a/doc/src/guide/python/signac-workflow.toml +++ b/doc/src/guide/python/signac-workflow.toml @@ -2,8 +2,8 @@ [workspace] value_file = "signac_statepoint.json" -[default.action] # ANCHOR: default +[default.action] command = "python actions.py --action $ACTION_NAME {directories}" # ANCHOR_END: default diff --git a/doc/src/guide/tutorial/group.md b/doc/src/guide/tutorial/group.md index 513db2b..791e695 100644 --- a/doc/src/guide/tutorial/group.md +++ b/doc/src/guide/tutorial/group.md @@ -57,8 +57,8 @@ This workflow will apply the `process_point` action to the directories where `action.group.include` is an array of conditions. A directory is included when *any* condition is true. `condition` is a length 3 array with the contents: `[JSON pointer, operator, operand]`. Think of the condition as an expression. The -[*JSON pointer*](../concepts/json-pointers.md) is a string that reads a particular value -from the directory's **value**. The *operator* is a comparison operator: `"<"`, `"<="`, +[*JSON pointer*](../concepts/json-pointers.md) is a string that references a portion of +the directory's **value**. The *operator* is a comparison operator: `"<"`, `"<="`, `"=="`, `">="`, or `">"`. The *operand* is the value to compare to. Together, these 3 elements make a *condition*. diff --git a/doc/src/guide/tutorial/submit.md b/doc/src/guide/tutorial/submit.md index 12eb4a0..445d46e 100644 --- a/doc/src/guide/tutorial/submit.md +++ b/doc/src/guide/tutorial/submit.md @@ -16,8 +16,8 @@ This section explains how to **submit** jobs to the **scheduler** with **row**. You can skip to the [next heading](#checking-your-job-script) if you are using one of these clusters. -If not, then you need to create a configuration files that describe your -cluster. You may also need to define launchers specific to your cluster. +If not, then you need to create a configuration file that describe your cluster. You may +also need to define launchers specific to your cluster. * [`$HOME/.config/row/clusters.toml`](../../clusters/index.md) gives your cluster a name, instructions on how to identify it, and lists the partitions your cluster @@ -56,7 +56,7 @@ Remember, **YOU ARE RESPONSIBLE** for the content of the scripts that you submit Make sure that the script is requesting the correct resources and is routed to the correct **partition**. -For example, the example workflow might generate a job script like this on Anvil: +For example, the example workflow generates this job script on Anvil: ```bash #!/bin/bash #SBATCH --job-name=hello-directory0+2 @@ -85,9 +85,9 @@ Notice the selection of 1 task on the `shared` **partition**. This is correct fo where the `shared` **partition** allows jobs smaller than one node and charges based on the number of CPU cores quested. -> Note: If you are using **row** on one of the built-in clusters, then **row** should -> always select the correct partition for your jobs. If you find it does not, please -> open an [issue](https://github.com/glotzerlab/row/issues). +> Note: When using **row** on one of the built-in clusters **row** should always select +> the correct partition for your jobs. Please open an +> [issue](https://github.com/glotzerlab/row/issues) if it does not. ### Submitting jobs diff --git a/doc/src/workflow/action/group.md b/doc/src/workflow/action/group.md index 04258c6..08f49a7 100644 --- a/doc/src/workflow/action/group.md +++ b/doc/src/workflow/action/group.md @@ -27,8 +27,8 @@ which may be true for a directory to be included in this group. Each selector is a **table** with only one of the following keys: * `condition`: An array of three elements: The *JSON pointer*, *the operator*, and the - *operand*. The [JSON pointer](../../guide/concepts/json-pointers.md) points to a - specific element from the directory's value. The operator may be `"<"`, `"<="`, + *operand*. The [JSON pointer](../../guide/concepts/json-pointers.md) references a + specific portion of the directory's value. The operator may be `"<"`, `"<="`, `"=="`, `">="`, or `">"`. * `all`: Array of conditions (see above). All conditions must be true for this selector to be true. @@ -54,8 +54,8 @@ Compare by array: condition = ["/array", "==", [1, "string", 14.0] ``` -Both operands **must** have the same data type. The JSON pointer must be present in the -value of **every** directory. +Both operands **must** have the same data type. The element referenced by JSON pointer +must be present in the value of **every** directory. When you omit `include`, **row** includes **all** directories in the workspace. From 2a3422c4a9582055e293f532b05132f39761c4e7 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 30 May 2024 08:39:40 -0400 Subject: [PATCH 6/7] Add google analytics. --- doc/theme/head.hbs | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/theme/head.hbs diff --git a/doc/theme/head.hbs b/doc/theme/head.hbs new file mode 100644 index 0000000..bad9822 --- /dev/null +++ b/doc/theme/head.hbs @@ -0,0 +1,9 @@ + + + From 6023c3fbb80621b375919604b014ac038507c9bb Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 30 May 2024 08:43:42 -0400 Subject: [PATCH 7/7] Build with latest version of mdbook. --- .github/workflows/test.yaml | 2 +- .readthedocs.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 99ecab5..54f4f1b 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,7 +20,7 @@ env: CARGO_TERM_COLOR: always ROW_COLOR: always CLICOLOR: 1 - MDBOOK_VERSION: 0.4.37 + MDBOOK_VERSION: 0.4.40 LINKCHECK_VERSION: 0.7.7 RUST_LATEST_VERSION: 1.78.0 BUNDLE_LICENSES_VERSION: 1.3.0 diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 87ca7a4..2b1f7ad 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ build: os: "ubuntu-22.04" commands: - mkdir -p bin - - curl -sSL "https://github.com/rust-lang/mdBook/releases/download/v0.4.37/mdbook-v0.4.37-x86_64-unknown-linux-gnu.tar.gz" | tar -xvz --directory "bin" + - curl -sSL "https://github.com/rust-lang/mdBook/releases/download/v0.4.40/mdbook-v0.4.40-x86_64-unknown-linux-gnu.tar.gz" | tar -xvz --directory "bin" - mkdir -p $READTHEDOCS_OUTPUT/html - echo "site-url = \"/$READTHEDOCS_LANGUAGE/$READTHEDOCS_VERSION/\"" >> doc/book.toml - bin/mdbook build doc --dest-dir=$READTHEDOCS_OUTPUT/html