From ac2e90c0cced94260de988f2fec172517251f4e2 Mon Sep 17 00:00:00 2001 From: SteBaum Date: Mon, 8 Jul 2024 11:47:48 +0200 Subject: [PATCH] feat: decommission playbooks --- playbooks/meta/hadoop-decommission.yml | 11 ++++++ playbooks/meta/hbase-decommission.yaml | 8 +++++ .../utils/hdfs_namenode_decomm_datanode.yml | 14 ++++++++ ...arn_resourcemanager_decomm_nodemanager.yml | 14 ++++++++ roles/hdfs/namenode/tasks/decommission.yml | 35 +++++++++++++++++++ .../resourcemanager/tasks/decommission.yml | 35 +++++++++++++++++++ 6 files changed, 117 insertions(+) create mode 100644 playbooks/meta/hadoop-decommission.yml create mode 100644 playbooks/meta/hbase-decommission.yaml create mode 100644 playbooks/utils/hdfs_namenode_decomm_datanode.yml create mode 100644 playbooks/utils/yarn_resourcemanager_decomm_nodemanager.yml create mode 100644 roles/hdfs/namenode/tasks/decommission.yml create mode 100644 roles/yarn/resourcemanager/tasks/decommission.yml diff --git a/playbooks/meta/hadoop-decommission.yml b/playbooks/meta/hadoop-decommission.yml new file mode 100644 index 00000000..f34bdda0 --- /dev/null +++ b/playbooks/meta/hadoop-decommission.yml @@ -0,0 +1,11 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +# Add variable of the decommissioned node FQDN example: +# ansible-playbook tdp/playbooks/meta/decommission.yml -e "excluded_node_fqdn=worker-01.novalocal" +- ansible.builtin.import_playbook: ../utils/yarn_capacity_scheduler.yml +- ansible.builtin.import_playbook: ../utils/yarn_resourcemanager_decomm_nodemanager.yml +# Decommission Yarn nodemanager +- ansible.builtin.import_playbook: ../utils/hdfs_namenode_decomm_datanode.yml +# Decommission HDFS namenode diff --git a/playbooks/meta/hbase-decommission.yaml b/playbooks/meta/hbase-decommission.yaml new file mode 100644 index 00000000..e1724beb --- /dev/null +++ b/playbooks/meta/hbase-decommission.yaml @@ -0,0 +1,8 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +# Add the host of the regionserver which should be decommissioned example: +# ansible-playbook tdpplaybooks/meta/hbase-decommission.yaml -l worker-01 +- ansible.builtin.import_playbook: ../hbase_regionserver_stop.yml +# Decommission HBase regionserver diff --git a/playbooks/utils/hdfs_namenode_decomm_datanode.yml b/playbooks/utils/hdfs_namenode_decomm_datanode.yml new file mode 100644 index 00000000..2fc62f1a --- /dev/null +++ b/playbooks/utils/hdfs_namenode_decomm_datanode.yml @@ -0,0 +1,14 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +- name: Hadoop HDFS datanode Decommission + hosts: hdfs_nn + tasks: + - tosit.tdp.resolve: # noqa unnamed-task + node_name: hdfs_namenode + - name: Decommission HDFS datanode + ansible.builtin.import_role: + name: tosit.tdp.hdfs.namenode + tasks_from: decommission + - ansible.builtin.meta: clear_facts # noqa unnamed-task diff --git a/playbooks/utils/yarn_resourcemanager_decomm_nodemanager.yml b/playbooks/utils/yarn_resourcemanager_decomm_nodemanager.yml new file mode 100644 index 00000000..466ddc7a --- /dev/null +++ b/playbooks/utils/yarn_resourcemanager_decomm_nodemanager.yml @@ -0,0 +1,14 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +- name: Hadoop Yarn resourcemanager decommission + hosts: yarn_rm + tasks: + - tosit.tdp.resolve: # noqa unnamed-task + node_name: yarn_resourcemanager + - name: Decommision YARN NM + ansible.builtin.import_role: + name: tosit.tdp.yarn.resourcemanager + tasks_from: decommission + - ansible.builtin.meta: clear_facts # noqa unnamed-task diff --git a/roles/hdfs/namenode/tasks/decommission.yml b/roles/hdfs/namenode/tasks/decommission.yml new file mode 100644 index 00000000..5dda1178 --- /dev/null +++ b/roles/hdfs/namenode/tasks/decommission.yml @@ -0,0 +1,35 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +- name: Render dfs.exclude file + ansible.builtin.template: + src: dfs.exclude.j2 + dest: "{{ hdfs_site['dfs.hosts.exclude'] }}" + owner: root + group: root + mode: "644" + +- name: Update exlude nodes file + ansible.builtin.lineinfile: + path: /etc/hadoop/conf.nn/dfs.exclude + line: "{{ item }}" + state: present + loop: "{{ [excluded_node_fqdn] }}" + +- name: kinit hdfs NN + ansible.builtin.command: kinit -kt /etc/security/keytabs/nn.service.keytab nn/{{ ansible_fqdn }}@{{ realm }} + become_user: hdfs + +- name: RefreshNodes + ansible.builtin.command: /usr/bin/hdfs dfsadmin -refreshNodes + become_user: hdfs + +- name: Check node status + ansible.builtin.command: hdfs dfsadmin -report -decommissioning + register: hdfs_output + become_user: hdfs + +- name: Print output of node status + ansible.builtin.debug: + var: hdfs_output.stdout diff --git a/roles/yarn/resourcemanager/tasks/decommission.yml b/roles/yarn/resourcemanager/tasks/decommission.yml new file mode 100644 index 00000000..ad7d0f89 --- /dev/null +++ b/roles/yarn/resourcemanager/tasks/decommission.yml @@ -0,0 +1,35 @@ +# Copyright 2022 TOSIT.IO +# SPDX-License-Identifier: Apache-2.0 + +--- +- name: Render yarn.exclude file + ansible.builtin.template: + src: yarn.exclude.j2 + dest: "{{ yarn_site['yarn.resourcemanager.nodes.exclude-path'] }}" + owner: root + group: root + mode: "644" + +- name: Update exlude nodes file + ansible.builtin.lineinfile: + path: /etc/hadoop/conf.rm/yarn.exclude + line: "{{ item }}" + state: present + loop: "{{ [excluded_node_fqdn] }}" + +- name: kinit yarn RM + ansible.builtin.command: kinit -kt /etc/security/keytabs/rm.service.keytab rm/{{ ansible_fqdn }}@{{ realm }} + become_user: yarn + +- name: RefreshNodes + ansible.builtin.command: /usr/bin/yarn rmadmin -refreshNodes + become_user: yarn + +- name: Check node status + ansible.builtin.command: yarn node -list -all + register: yarn_output + become_user: yarn + +- name: Print output of node status + ansible.builtin.debug: + var: yarn_output.stdout