-
Notifications
You must be signed in to change notification settings - Fork 0
/
docker-run-generate-outputs.sh
executable file
·132 lines (109 loc) · 5.17 KB
/
docker-run-generate-outputs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
set -e
PROJECT_NAME="$(<configurations/docker_image_project_name.txt)"
IMAGE_NAME=$PROJECT_NAME-generate-outputs
while [[ $# -gt 0 ]]; do
case "$1" in
--profile-cpu)
PROFILE_CPU=true
CPU_PROFILE_OUTPUT_PATH="$2"
shift 2;;
--profile-memory)
PROFILE_MEMORY=true
MEMORY_PROFILE_OUTPUT_PATH="$2"
shift 2;;
--)
shift
break;;
*)
break;;
esac
done
# Check that the correct number of arguments were provided.
if [[ $# -ne 12 ]]; then
echo "Usage: ./docker-run.sh
[--profile-cpu <profile-output-path>] [--profile-memory <profile-output-path>]
<user> <google-cloud-credentials-file-path> <pipeline-configuration-file-path>
<raw-data-dir> <prev-coded-dir> <messages-json-output-path> <individuals-json-output-path>
<icr-output-dir> <coded-output-dir> <messages-output-csv> <individuals-output-csv> <production-output-csv>"
exit
fi
# Assign the program arguments to bash variables.
USER=$1
INPUT_GOOGLE_CLOUD_CREDENTIALS=$2
INPUT_PIPELINE_CONFIGURATION=$3
INPUT_RAW_DATA_DIR=$4
PREV_CODED_DIR=$5
OUTPUT_MESSAGES_JSONL=$6
OUTPUT_INDIVIDUALS_JSONL=$7
OUTPUT_ICR_DIR=$8
OUTPUT_CODED_DIR=$9
OUTPUT_MESSAGES_CSV=${10}
OUTPUT_INDIVIDUALS_CSV=${11}
OUTPUT_PRODUCTION_CSV=${12}
# Build an image for this pipeline stage.
docker build --build-arg INSTALL_CPU_PROFILER="$PROFILE_CPU" --build-arg INSTALL_MEMORY_PROFILER="$PROFILE_MEMORY" -t "$IMAGE_NAME" .
# Create a container from the image that was just built.
if [[ "$PROFILE_CPU" = true ]]; then
PROFILE_CPU_CMD="pyflame -o /data/cpu.prof -t"
SYS_PTRACE_CAPABILITY="--cap-add SYS_PTRACE"
fi
if [[ "$PROFILE_MEMORY" = true ]]; then
PROFILE_MEMORY_CMD="mprof run -o /data/memory.prof"
fi
CMD="pipenv run $PROFILE_CPU_CMD $PROFILE_MEMORY_CMD python -u generate_outputs.py \
\"$USER\" /credentials/google-cloud-credentials.json /data/pipeline_configuration.json \
/data/raw-data /data/prev-coded \
/data/output-messages.jsonl /data/output-individuals.jsonl /data/output-icr /data/coded \
/data/output-messages.csv /data/output-individuals.csv /data/output-production.csv \
"
container="$(docker container create ${SYS_PTRACE_CAPABILITY} -w /app "$IMAGE_NAME" /bin/bash -c "$CMD")"
echo "Created container $container"
container_short_id=${container:0:7}
# Copy input data into the container
echo "Copying $INPUT_PIPELINE_CONFIGURATION -> $container_short_id:/data/pipeline_configuration.json"
docker cp "$INPUT_PIPELINE_CONFIGURATION" "$container:/data/pipeline_configuration.json"
echo "Copying $INPUT_GOOGLE_CLOUD_CREDENTIALS -> $container_short_id:/credentials/google-cloud-credentials.json"
docker cp "$INPUT_GOOGLE_CLOUD_CREDENTIALS" "$container:/credentials/google-cloud-credentials.json"
echo "Copying $INPUT_RAW_DATA_DIR -> $container_short_id:/data/raw-data"
docker cp "$INPUT_RAW_DATA_DIR" "$container:/data/raw-data"
if [[ -d "$PREV_CODED_DIR" ]]; then
echo "Copying $PREV_CODED_DIR -> $container_short_id:/data/prev-coded"
docker cp "$PREV_CODED_DIR" "$container:/data/prev-coded"
fi
# Run the container
echo "Starting container $container_short_id"
docker start -a -i "$container"
# Copy the output data back out of the container
echo "Copying $container_short_id:/data/output-messages.json -> $OUTPUT_MESSAGES_JSONL"
mkdir -p "$(dirname "$OUTPUT_MESSAGES_JSONL")"
docker cp "$container:/data/output-messages.jsonl" "$OUTPUT_MESSAGES_JSONL"
echo "Copying $container_short_id:/data/output-individuals.jsonl -> $OUTPUT_INDIVIDUALS_JSONL"
mkdir -p "$(dirname "$OUTPUT_INDIVIDUALS_JSONL")"
docker cp "$container:/data/output-individuals.jsonl" "$OUTPUT_INDIVIDUALS_JSONL"
echo "Copying $container_short_id:/data/output-icr/. -> $OUTPUT_ICR_DIR"
mkdir -p "$OUTPUT_ICR_DIR"
docker cp "$container:/data/output-icr/." "$OUTPUT_ICR_DIR"
echo "Copying $container_short_id:/data/coded/. -> $OUTPUT_CODED_DIR"
mkdir -p "$OUTPUT_CODED_DIR"
docker cp "$container:/data/coded/." "$OUTPUT_CODED_DIR"
echo "Copying $container_short_id:/data/output-production.csv -> $OUTPUT_PRODUCTION_CSV"
mkdir -p "$(dirname "$OUTPUT_PRODUCTION_CSV")"
docker cp "$container:/data/output-production.csv" "$OUTPUT_PRODUCTION_CSV"
echo "Copying $container_short_id:/data/output-messages.csv -> $OUTPUT_MESSAGES_CSV"
mkdir -p "$(dirname "$OUTPUT_MESSAGES_CSV")"
docker cp "$container:/data/output-messages.csv" "$OUTPUT_MESSAGES_CSV"
echo "Copying $container_short_id:/data/output-individuals.csv -> $OUTPUT_INDIVIDUALS_CSV"
mkdir -p "$(dirname "$OUTPUT_INDIVIDUALS_CSV")"
docker cp "$container:/data/output-individuals.csv" "$OUTPUT_INDIVIDUALS_CSV"
if [[ "$PROFILE_CPU" = true ]]; then
echo "Copying $container_short_id:/data/cpu.prof -> $CPU_PROFILE_OUTPUT_PATH"
mkdir -p "$(dirname "$CPU_PROFILE_OUTPUT_PATH")"
docker cp "$container:/data/cpu.prof" "$CPU_PROFILE_OUTPUT_PATH"
fi
if [[ "$PROFILE_MEMORY" = true ]]; then
echo "Copying $container_short_id:/data/memory.prof -> $MEMORY_PROFILE_OUTPUT_PATH"
docker cp "$container:/data/memory.prof" "$MEMORY_PROFILE_OUTPUT_PATH"
fi
# Tear down the container, now that all expected output files have been copied out successfully
docker container rm "$container" >/dev/null