-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dockerfile
85 lines (71 loc) · 3.58 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Dockerfile for CDH5 images
#
# Copyright (c) 2016 Coho Data Inc.
# The subject matter distributed under this license is or is based on
# information and material generated by Coho Data Inc. It may only be
# acquired, used, modified and distributed under the terms of the Coho
# Data Compute Cluster License v1.0. Except as permitted in the Coho
# Data Compute Cluster License v1.0, all other rights are reserved in
# any copyright or other similar rights which may exist. Execution of
# software distributed under this Coho Data Compute Cluster License
# v1.0 may cause you to acquire third-party software (as described in
# the accompanying documentation) and you agree (a) to comply with the
# applicable licenses thereunder and (b) that Coho is not responsible
# in any way for your compliance or non-compliance with the applicable
# third-party licenses or the consequences of your being subject to
# said licenses or your compliance or non-compliance.
#
# Installs:
# - Oracle JDK 7
# - CDH5 Hadoop
# - some utilities
FROM ubuntu:14.04.4
MAINTAINER [email protected]
ENV cdh_version 5.7.0
LABEL com.cohodata.image="{ 'version' : '${cdh_version}' }"
RUN apt-get update -y && apt-get install -y curl
RUN curl -s http://archive.cloudera.com/cdh5/ubuntu/precise/amd64/cdh/archive.key | apt-key add -
ADD https://archive.cloudera.com/cdh5/ubuntu/trusty/amd64/cdh/cloudera.list /etc/apt/sources.list.d/cloudera-cdh5.list
ADD https://archive.cloudera.com/cm5/ubuntu/trusty/amd64/cm/cloudera.list /etc/apt/sources.list.d/cloudera-cm5.list
RUN sed -i "s/\(trusty-cdh\)[^[:space:]]*/\1$cdh_version/g" /etc/apt/sources.list.d/cloudera-cdh5.list
RUN sed -i "s/\(trusty-cdh\)[^[:space:]]*/\1$cdh_version/g" /etc/apt/sources.list.d/cloudera-cm5.list
# Force apt to use zookeeper package distributed by cdh
RUN echo '' >> /etc/apt/preferences
RUN echo 'Package: zookeeper' >> /etc/apt/preferences
RUN echo 'Pin: origin archive.cloudera.com' >> /etc/apt/preferences
RUN echo 'Pin-Priority: 600' >> /etc/apt/preferences
RUN apt-get update -y && apt-get install -y ant \
unzip \
wget \
lbzip2 \
vim \
emacs \
rsync \
iputils-ping \
net-tools \
python \
python-pip \
oracle-j2sdk1.7 \
hadoop-yarn-resourcemanager \
hadoop-yarn-nodemanager \
hadoop-mapreduce \
hadoop-mapreduce-historyserver \
hadoop-0.20-mapreduce-jobtracker \
hadoop-0.20-mapreduce-tasktracker
RUN pip install docker-py==1.10.6
# Add Hadoop configurations
COPY etc/hadoop/conf.docker.yarn /etc/hadoop/conf.docker.yarn
ENV LOGGER_ENV_VAR "INFO,console"
# Add Hadoop scripts
COPY usr/local/bin/cio-hadoop-run /usr/local/bin/cio-hadoop-run
COPY usr/lib/hadoop-yarn/sbin/yarn-daemon.sh /usr/lib/hadoop-yarn/sbin/yarn-daemon.sh
COPY usr/lib/hadoop-mapreduce/sbin/mr-jobhistory-daemon.sh /usr/lib/hadoop-mapreduce/sbin/mr-jobhistory-daemon.sh
# This image will be used to set up a Yarn cluster
RUN rm -rf /etc/hadoop/conf && \
mv /etc/hadoop/conf.docker.yarn /etc/hadoop/conf
# Docker compose
ADD https://github.com/docker/compose/releases/download/1.10.0/docker-compose-Linux-x86_64 /usr/local/bin/docker-compose
RUN chmod 0755 /usr/local/bin/docker-compose
# Driver
COPY usr/bin/deploy-cdh-cluster /usr/bin/deploy-cdh-cluster
CMD ["/usr/local/bin/cio-hadoop-run"]