From 048e0ebc709209d03661dcfa1dabc5936a727b65 Mon Sep 17 00:00:00 2001 From: Gerald Shen Date: Mon, 8 Jan 2024 14:16:27 -0800 Subject: [PATCH 1/4] add workflow for syncing Signed-off-by: Gerald Shen --- .github/workflows/sync_branch.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/sync_branch.yaml diff --git a/.github/workflows/sync_branch.yaml b/.github/workflows/sync_branch.yaml new file mode 100644 index 000000000..028f774ff --- /dev/null +++ b/.github/workflows/sync_branch.yaml @@ -0,0 +1,24 @@ +name: sync main with dev +on: + push: + branches: + - main + +jobs: + sync-branches: + runs-on: ubuntu-latest + name: syncing main with dev + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Node + uses: actions/setup-node@v1 + with: + node-version: 12 + - name: Opening pull request + id: pull + uses: tretuna/sync-branches@1.4.0 + with: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + FROM_BRANCH: "main" + TO_BRANCH: "dev" From 6b538b68ac1769de338f3067dcb043e6bdea5c07 Mon Sep 17 00:00:00 2001 From: Gerald Shen Date: Mon, 8 Jan 2024 14:21:12 -0800 Subject: [PATCH 2/4] add contributing Signed-off-by: Gerald Shen --- CONTRIBUTING.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4041806a8..c1114f459 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ Thanks for the interest in contributing to NeMo-Aligner. We do all of NeMo-Align # Pull Requests (PR) Guidelines -**Send your PRs to the `main` branch** +**Send your PRs to the `main` or `dev` branch** 1) Make sure your PR does one thing. Have a clear answer to "What does this PR do?". 2) Read General Principles and style guide below @@ -12,6 +12,10 @@ Thanks for the interest in contributing to NeMo-Aligner. We do all of NeMo-Align 4) Make sure all unittests finish successfully before sending PR ``pytest`` or (if your dev box does not have GPU) ``pytest --cpu`` from the root folder 5) Send your PR and request a review +**NOTE**: The `main` branch uses a fixed nemo version which we will update on every release. The `dev` branch is the branch that has all the commits from aligner main but is up to date with nemo main, this branch is less stable but we run nightly tests on this branch to make sure everything works. We only provide the dockerfile that works with `main`. + +Every release `dev` and `main` will sync to be the same. + ## Unit tests Quick unit tests (locally, while developing) ``` From b32c8a94137c50befa8e18356b2deaae29b87e33 Mon Sep 17 00:00:00 2001 From: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Date: Mon, 8 Jan 2024 22:20:43 -0800 Subject: [PATCH 3/4] fix function name change from nemo (#71) Signed-off-by: Gerald Shen --- nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py | 2 +- nemo_aligner/utils/train_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py b/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py index 056769086..e9bb539c1 100644 --- a/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py +++ b/nemo_aligner/models/nlp/gpt/megatron_gpt_reward_model.py @@ -318,7 +318,7 @@ def on_load_checkpoint(self, checkpoint) -> None: """ # mcore uses distributed checkpointing if "state_dict" in checkpoint and checkpoint["state_dict"]: - for index, module in enumerate(self.get_gpt_module_list()): + for index, module in enumerate(self.get_model_module_list()): if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None: checkpoint_state_dict = checkpoint["state_dict"][f"model_{index}"] else: diff --git a/nemo_aligner/utils/train_utils.py b/nemo_aligner/utils/train_utils.py index 7864a35fe..82b1dd660 100644 --- a/nemo_aligner/utils/train_utils.py +++ b/nemo_aligner/utils/train_utils.py @@ -38,7 +38,7 @@ def set_sync_funcs(ptl_model, forward_only): param_sync_func = ptl_model.sync_overlap_parameters # pipeline schedules will get these from ptl_model.model.config - for module in ptl_model.get_gpt_module_list(): + for module in ptl_model.get_model_module_list(): module.config.no_sync_func = no_sync_func module.config.grad_sync_func = grad_sync_func module.config.param_sync_func = param_sync_func From bec82bc6211ad7fd51078205fa36ac3482ffb17b Mon Sep 17 00:00:00 2001 From: Gerald Shen <119401249+gshennvm@users.noreply.github.com> Date: Mon, 8 Jan 2024 22:29:35 -0800 Subject: [PATCH 4/4] Update CONTRIBUTING.md Co-authored-by: Olivier Delalleau <507137+odelalleau@users.noreply.github.com> --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c1114f459..ec1293b81 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,7 +12,7 @@ Thanks for the interest in contributing to NeMo-Aligner. We do all of NeMo-Align 4) Make sure all unittests finish successfully before sending PR ``pytest`` or (if your dev box does not have GPU) ``pytest --cpu`` from the root folder 5) Send your PR and request a review -**NOTE**: The `main` branch uses a fixed nemo version which we will update on every release. The `dev` branch is the branch that has all the commits from aligner main but is up to date with nemo main, this branch is less stable but we run nightly tests on this branch to make sure everything works. We only provide the dockerfile that works with `main`. +**NOTE**: The `main` branch uses a fixed NeMo version which we will update on every release. The `dev` branch is the branch that has all commits from `main` but uses NeMo's main branch: this branch is less stable but we run nightly tests on it to make sure everything works. We only provide the dockerfile that works with `main`, which is the branch most PRs should target unless they require the latest NeMo main (in which case they should target `dev`). Every release `dev` and `main` will sync to be the same.