forked from awslabs/amazon-redshift-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze-vacuum-schema.py
executable file
·131 lines (116 loc) · 6.7 KB
/
analyze-vacuum-schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
from __future__ import print_function
'''
analyze-vacuum-schema.py
* Copyright 2015, Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
The Redshift Analyze Vacuum Utility gives you the ability to automate VACUUM and ANALYZE operations.
When run, it will analyze or vacuum an entire schema or individual tables. This Utility Analyzes
and Vacuums table(s) in a Redshift Database schema, based on certain parameters like unsorted,
stats off and size of the table and system alerts from stl_explain & stl_alert_event_log.
By turning on/off '--analyze-flag' and '--vacuum-flag' parameters, you can run it as 'vacuum-only'
or 'analyze-only' utility. This script can be scheduled to run VACUUM and ANALYZE as part of
regular maintenance/housekeeping activities, when there are less database activities (quiet period).
This script will:
1) Analyze a single table or tables in a schema based on,
a) Alerts from stl_explain & stl_alert_event_log.
b) 'stats off' metrics from SVV_TABLE_INFO.
2) Vacuum a single table or tables in a schema based on,
a) The alerts from stl_alert_event_log.
b) The 'unsorted' and 'size' metrics from SVV_TABLE_INFO.
c) Vacuum reindex to analyze the interleaved sort keys
Srinikri Amazon Web Services (2015)
11/21/2015 : Added support for vacuum reindex to analyze the interleaved sort keys.
09/01/2017 : Fixed issues with interleaved sort key tables per https://github.com/awslabs/amazon-redshift-utils/issues/184
11/09/2017 : Refactored to support running in AWS Lambda
14/12/2017 : Refactored to support a more sensible interface style with kwargs
'''
import os
import sys
import argparse
# add the lib directory to the sys path
try:
sys.path.append(os.path.join(os.path.dirname(__file__), "lib"))
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
except:
pass
import getopt
import analyze_vacuum
import config_constants
__version__ = ".9.2.1"
OK = 0
ERROR = 1
INVALID_ARGS = 2
NO_WORK = 3
TERMINATED_BY_USER = 4
NO_CONNECTION = 5
# setup cli args
parser = argparse.ArgumentParser()
parser.add_argument("--analyze-flag", dest="analyze_flag", default=True, type=bool,
help="Flag to turn ON/OFF ANALYZE functionality (True or False : Default = True ")
parser.add_argument("--max-unsorted-pct", dest="max_unsorted_pct",
help="Maximum unsorted percentage(% to consider a table for vacuum : Default = 50%")
parser.add_argument("--min-interleaved-cnt", dest="min_interleaved_cnt", type=int,
help="Minimum stv_interleaved_counts records to consider a table for vacuum reindex: Default = 0")
parser.add_argument("--min-interleaved-skew", dest="min_interleaved_skew",
help="Minimum index skew to consider a table for vacuum reindex: Default = 1.4")
parser.add_argument("--min-unsorted-pct", dest="min_unsorted_pct",
help="Minimum unsorted percentage(% to consider a table for vacuum : Default = 5%")
parser.add_argument("--stats-off-pct ", dest="stats_off_pct",
help="Minimum stats off percentage(% to consider a table for analyze : Default = 10%")
parser.add_argument("--table-name", dest="table_name",
help="A specific table to be Analyzed or Vacuumed if analyze-schema is not desired")
parser.add_argument("--vacuum-flag", dest="vacuum_flag", default=True, type=bool,
help="Flag to turn ON/OFF VACUUM functionality (True or False : Default = True")
parser.add_argument("--vacuum-parameter", dest="vacuum_parameter",
help="Vacuum parameters [ FULL | SORT ONLY | DELETE ONLY | REINDEX ] Default = FULL")
parser.add_argument("--blacklisted-tables", dest="blacklisted_tables", help="The tables we do not want to Vacuum")
parser.add_argument("--db-conn-opts", dest="db_conn_opts",
help="Additional connection options. name1=opt1[ name2=opt2]..")
parser.add_argument("--db-host", dest="db_host", required=True, help="The Cluster endpoint")
parser.add_argument("--db-port", dest="db_port", type=int, required=True,
help="The Cluster endpoint port : Default = 5439")
parser.add_argument("--db-pwd", dest="db_pwd", help="The Password for the Database User to connect to")
parser.add_argument("--db-user", dest="db_user", required=True, help="The Database User to connect to")
parser.add_argument("--debug ", dest="debug", default=False,
help="Generate Debug Output including SQL Statements being run")
parser.add_argument("--ignore-errors", dest="ignore_errors", default=True,
help="Ignore errors raised when running and continue processing")
parser.add_argument("--max-table-size-mb", dest="max_table_size_mb", type=int,
help="Maximum table size in MB : Default = 700*1024 MB")
parser.add_argument("--output-file", dest="output_file", help="The full path to the output file to be generated")
parser.add_argument("--predicate-cols", dest="predicate_cols", help="Analyze predicate columns only")
parser.add_argument("--query-group", dest="query_group", help="Set the query_group for all queries")
parser.add_argument("--require-ssl", dest="require_ssl", default=False,
help="Does the connection require SSL? (True | False")
parser.add_argument("--schema-name", dest="schema_name",
help="The Schema to be Analyzed or Vacuumed (REGEX: Default = public")
parser.add_argument("--slot-count", dest="slot_count", help="Modify the wlm_query_slot_count : Default = 1")
parser.add_argument("--suppress-cloudwatch", dest="suppress_cw",
help="Don't emit CloudWatch metrics for analyze or vacuum when set to True")
parser.add_argument("--db", dest="db", help="The Database to Use")
full_args = parser.parse_args()
parse_args = {}
# remove args that end up as None
for k, v in vars(full_args).items():
if v is not None:
parse_args[k] = v
def main(argv):
# get environmental args
args = {config_constants.DB_NAME: os.environ.get('PGDATABASE', None),
config_constants.DB_USER: os.environ.get('PGUSER', None),
config_constants.DB_PASSWORD: os.environ.get('PGPASSWORD', None),
config_constants.DB_HOST: os.environ.get('PGHOST', None),
config_constants.DB_PORT: os.environ.get('PGPORT', 5439)}
# add argparse args
args.update(parse_args)
if args.get(config_constants.OUTPUT_FILE) is not None:
sys.stdout = open(args.get(config_constants.OUTPUT_FILE), 'w')
# invoke the main method of the utility
result = analyze_vacuum.run_analyze_vacuum(**args)
if result is not None:
sys.exit(result)
else:
sys.exit(0)
if __name__ == "__main__":
main(sys.argv)