-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
executable file
·114 lines (94 loc) · 3.7 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND noninteractive
#ENV http_proxy 'http://mainproxy.ucm.conti.de:8980'
#ENV https_proxy 'https://mainproxy.ucm.conti.de:8980'
# Install necessary dependencies
RUN apt-get update -y && apt-get install -y wget curl tar sudo ssh openssh-server rsync
#RUN apt-get update
#RUN apt-get install -y wget
#RUN apt-get install curl tar sudo ssh openssh-server rsync
# Download and extract Hadoop
RUN wget http://mirror.olnevhost.net/pub/apache/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz && \
tar -xzvf hadoop-3.3.1.tar.gz && \
mv hadoop-3.3.1 /opt/hadoop && \
rm hadoop-3.3.1.tar.gz
#install java
RUN apt-get install -y openjdk-8-jdk vim
# Set environment variables
ENV HADOOP_HOME /opt/hadoop
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#add config
#ADD config/* $HADOOP_HOME/etc/hadoop/
#RUN chmod +x $HADOOP_HOME/etc/hadoop/*
# Start SSH server
#CMD ["/usr/sbin/sshd", "-D"]
#RUN mkdir /var/run/sshd && \
# echo 'root:password' | chpasswd && \
# ssh-keygen -A
#RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \
# sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config && \
# sed -i 's/#X11Forwarding yes/X11Forwarding yes/' /etc/ssh/sshd_config && \
# sed -i 's/#X11UseLocalhost yes/X11UseLocalhost no/' /etc/ssh/sshd_config && \
# sed -i 's/#UsePAM yes/UsePAM no/' /etc/ssh/sshd_config
# command bin/bash
RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
RUN cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
RUN chmod 0600 ~/.ssh/authorized_keys
# copy ssh config
#ADD ssh_configs/ssh_config /root/.ssh/config
#delete all tmp file: namenode, datanode
RUN rm -rf /tmp/*
#ADD start-ssh/* /
#RUN chmod +x /start-ssh.sh
# CMD bash /start-ssh.sh
#CMD ["/usr/sbin/sshd", "-D"]
#RUN /etc/init.d/ssh start
#CMD ["tail", "-f" ,"/dev/null"]
######## install spark
RUN apt-get install -y net-tools ca-certificates python3 #python3-pip
RUN update-alternatives --install "/usr/bin/python" "python" "$(which python3)" 1
# Fix the value of PYTHONHASHSEED
# Note: this is needed when you use Python 3.3 or greater
ENV SPARK_VERSION=3.3.2 \
HADOOP_VERSION=3 \
SPARK_HOME=/opt/spark \
PYTHONHASHSEED=1
RUN wget --no-verbose -O apache-spark.tgz "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" \
&& mkdir -p /opt/spark \
&& tar -xf apache-spark.tgz -C /opt/spark --strip-components=1 \
&& rm apache-spark.tgz
WORKDIR /opt/spark
ENV SPARK_MASTER_PORT=7077 \
SPARK_MASTER_WEBUI_PORT=8080 \
SPARK_LOG_DIR=/opt/spark/logs \
SPARK_MASTER_LOG=/opt/spark/logs/spark-master.out \
SPARK_WORKER_LOG=/opt/spark/logs/spark-worker.out \
SPARK_WORKER_WEBUI_PORT=8080 \
SPARK_WORKER_PORT=7000 \
SPARK_MASTER="spark://spark-master:7077" \
SPARK_WORKLOAD="master"
RUN mkdir -p $SPARK_LOG_DIR && \
touch $SPARK_MASTER_LOG && \
touch $SPARK_WORKER_LOG && \
ln -sf /dev/stdout $SPARK_MASTER_LOG && \
ln -sf /dev/stdout $SPARK_WORKER_LOG
#RUN . "/opt/spark/bin/load-spark-env.sh"
#expose port hadoop and spark
expose 9870 8088 9864 8080 9000 8032 50010 8042 19888 18080 4040 35371 43351 8888 33457
# setup hive
WORKDIR /opt
ARG HIVE_VERSION=3.1.3
ARG HIVE_URL=https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
RUN wget $HIVE_URL
RUN tar xzf apache-hive-$HIVE_VERSION-bin.tar.gz
RUN mv apache-hive-$HIVE_VERSION-bin hive
ENV HIVE_HOME=/opt/hive
ENV HIVE_CONF_DIR=$HIVE_HOME/conf
ENV PATH=$HIVE_HOME/sbin:$HIVE_HOME/bin:$PATH
ENV HADOOP_VERSION=3.3.1
WORKDIR /opt/spark/jars
RUN wget https://jdbc.postgresql.org/download/postgresql-42.3.2.jar
WORKDIR /opt
expose 10000 9083
CMD ["tail", "-f" ,"/dev/null"]