forked from Cargill/pipewrench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
87 lines (61 loc) · 2.81 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
{# Copyright 2017 Cargill Incorporated
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. #}
impala-cmd={{ conf.impala_cmd }}
source-db-cmd={{ conf.source_database.cmd }}
integration-test:
$(MAKE) clean
$(MAKE) first-run
# $(MAKE) test-rowcount @TODO find goo primary key for integration tests (baseball table)
sqoop-create: #### Create Sqoop job
./run-with-logging.sh ./sqoop-create.sh $@
sqoop-clean: ## Delete Sqoop job
./run-with-logging.sh ./sqoop-delete.sh $@
sqoop-exec: ## Execute sqoop job
./run-with-logging.sh ./sqoop-exec.sh $@
parquet-table: parquet-table-create.sql #### Create Parquet Impala Table
$(impala-cmd) parquet-table-create.sql
test-rowcount: #### Run integration tests
./run-with-logging.sh ./test-rowcount.sh $@
parquet-table-clean: parquet-table-drop.sql ## Drop Parquet impala table
$(impala-cmd) parquet-table-drop.sql
parquet-archive: hdfs-archive.sh
./run-with-logging.sh ./hdfs-archive.sh $@
parquet-unarchive: hdfs-unarchive.sh
./run-with-logging.sh ./hdfs-unarchive.sh $@
parquet-refresh:
$(impala-cmd) ./parquet-refresh.sql
hdfs-clean: hdfs-delete.sh ## Delete parquet files from HDFS
./run-with-logging.sh ./hdfs-delete.sh $@
kudu-table: kudu-table-create.sql ## Create Impala Kudu Table
$(impala-cmd) kudu-table-create.sql
kudu-insert: tables kudu-table-insert.sql ## Insert data into Kudu table from Parquet
$(impala-cmd) kudu-table-insert.sql
compute-stats: compute-stats.sql
$(impala-cmd) compute-stats.sql
kudu-table-clean: kudu-table-drop.sql ## Drop Kudu table
$(impala-cmd) kudu-table-drop.sql
tables-clean: kudu-table-clean parquet-table-clean ## Drop all tables
tables: parquet-table kudu-table ## Create all tables
update:
$(MAKE) sqoop-exec
$(MAKE) parquet-refresh
$(MAKE) kudu-insert
$(MAKE) parquet-archive ## Insert data from source db into Kudu
first-run:
$(MAKE) sqoop-create
$(MAKE) tables
$(MAKE) update
$(MAKE) compute-stats ## Run entire workflow, building all tables and pulling source db data into Kudu
clean: sqoop-clean kudu-table-clean parquet-table-clean hdfs-clean ## Remove all tables and data
targets: ## Print out a list of available targets
@fgrep -h ": " $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/:.*//'
help: ## Print help message
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##//'