-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcluster.yml
75 lines (55 loc) · 2.21 KB
/
cluster.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
---
- hosts: all
become: yes
vars:
- hadoop_version: "3.3.4"
- hadoop_sha512_checksum: "ca5e12625679ca95b8fd7bb7babc2a8dcb2605979b901df9ad137178718821097b67555115fafc6dbf6bb32b61864ccb6786dbc555e589694a22bf69147780b4"
- hdfs_data_root: "/home/{{ ansible_user }}/data"
- hadoop_home: "/home/{{ ansible_user }}/hadoop"
- spark_version: "3.3.2"
- spark_sha512_checksum: "4cd2396069fbe0f8efde2af4fd301bf46f8c6317e9dea1dd42a405de6a38380635d49b17972cb92c619431acece2c3af4c23bfdf193cedb3ea913ed69ded23a1"
- spark_home: "/home/{{ ansible_user }}/spark"
vars_files:
- vars.yml
pre_tasks:
- include_tasks: configure_hostnames.yml
- name: Update apt cache
apt: update_cache=yes cache_valid_time=3600 # 1 hr
throttle: 1
- name: Check if hdfs is already set up
stat:
path: "{{ hdfs_data_root }}"
register: hdfs_data_root_result
- name: Perform a comprehensive upgrade if hdfs is not set up
apt: upgrade=dist
when: hdfs_data_root_result.stat.isdir is not defined or not hdfs_data_root_result.stat.isdir
notify: reboot server
throttle: 1
- name: Perform safe upgrade
# Safe upgrade only if hdfs is set up to limit any data loss risks
apt: upgrade=safe
when: hdfs_data_root_result.stat.isdir is defined and hdfs_data_root_result.stat.isdir
throttle: 1
tasks:
- name: Ensure rsync is present to sync files across cluster
apt: pkg=rsync state=present
- include_tasks: master_cluster_ssh_access.yml
- name: Ensure that we have the jdk installed
apt: pkg=openjdk-11-jdk state=present
throttle: 1
- include_tasks: setup_hadoop_distribution.yml
- include_tasks: configure_hadoop.yml
- name: Format hdfs for first time use
command: "{{ hadoop_home }}/bin/hdfs namenode -format creates={{ hdfs_data_root }}"
when: inventory_hostname == "master"
become: no
- include_tasks: setup_spark_distribution.yml
- include_tasks: configure_spark.yml
- include_tasks:
file: start_cluster_services.yml
apply:
become: no
when: inventory_hostname == "master"
handlers:
- name: reboot server
reboot: msg="Rebooting host"