From 02aec8b1a2c68dd270e107b12fc8f002c4729bc2 Mon Sep 17 00:00:00 2001 From: "David W. Streever" Date: Thu, 17 Sep 2020 21:59:41 -0400 Subject: [PATCH] Formatting. Started Analyze check --- .../streever/hive/sre/CheckCalculation.java | 33 +++- .../java/com/streever/hive/sre/DbPaths.java | 160 +++++++++++----- .../com/streever/hive/sre/DbSetProcess.java | 177 ++++++++++-------- .../java/com/streever/hive/sre/Utils.java | 15 +- .../resources/MYSQL/hive_sre_queries.yaml | 29 ++- .../resources/POSTGRES/hive_sre_queries.yaml | 23 ++- .../main/resources/procs/h3_upg_procs.yaml | 63 +------ .../main/resources/procs/hive_sre_procs.yaml | 55 ++++-- 8 files changed, 349 insertions(+), 206 deletions(-) diff --git a/hive-sre/src/main/java/com/streever/hive/sre/CheckCalculation.java b/hive-sre/src/main/java/com/streever/hive/sre/CheckCalculation.java index a6a783e..f745289 100644 --- a/hive-sre/src/main/java/com/streever/hive/sre/CheckCalculation.java +++ b/hive-sre/src/main/java/com/streever/hive/sre/CheckCalculation.java @@ -1,7 +1,13 @@ package com.streever.hive.sre; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; +import java.io.PrintStream; import java.util.Arrays; +@JsonIgnoreProperties({"scriptEngine"}) public class CheckCalculation implements Cloneable { // Needs to return a boolean. @@ -15,6 +21,15 @@ public class CheckCalculation implements Cloneable { // passed to the engine. private String[] params; + /** + * allows stdout to be captured if necessary + */ + public PrintStream successStream = System.out; + /** + * allows stderr to be captured if necessary + */ + public PrintStream errorStream = System.err; + public String getTest() { return test; } @@ -47,9 +62,25 @@ public void setParams(String[] params) { this.params = params; } + public PrintStream getSuccessStream() { + return successStream; + } + + public void setSuccessStream(PrintStream successStream) { + this.successStream = successStream; + } + + public PrintStream getErrorStream() { + return errorStream; + } + + public void setErrorStream(PrintStream errorStream) { + this.errorStream = errorStream; + } + @Override protected Object clone() throws CloneNotSupportedException { - CheckCalculation rtn = (CheckCalculation)super.clone(); + CheckCalculation rtn = (CheckCalculation) super.clone(); rtn.setFail(this.fail); rtn.setTest(this.test); rtn.setPass(this.pass); diff --git a/hive-sre/src/main/java/com/streever/hive/sre/DbPaths.java b/hive-sre/src/main/java/com/streever/hive/sre/DbPaths.java index 9df1266..bb9eeca 100644 --- a/hive-sre/src/main/java/com/streever/hive/sre/DbPaths.java +++ b/hive-sre/src/main/java/com/streever/hive/sre/DbPaths.java @@ -2,18 +2,19 @@ import com.streever.hadoop.HadoopSession; import com.streever.hadoop.shell.command.CommandReturn; +import com.streever.hive.reporting.ReportingConf; import com.streever.sql.JDBCUtils; import com.streever.sql.QueryDefinition; import com.streever.sql.ResultArray; +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; +import javax.script.ScriptException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import java.util.UUID; +import java.util.*; import static com.streever.hive.reporting.ReportCounter.*; @@ -22,7 +23,9 @@ public class DbPaths extends SRERunnable { private DbSetProcess parent; private HadoopSession cliSession; - private List checks = new ArrayList(); + private List commandChecks = new ArrayList(); + private CheckCalculation calculationCheck = null; + private ScriptEngine scriptEngine = null; public DbSetProcess getParent() { return parent; @@ -36,37 +39,61 @@ public HadoopSession getCliSession() { return cliSession; } - public List getChecks() { - return checks; + public List getCommandChecks() { + return commandChecks; } - public void setChecks(List checks) { - this.checks = checks; + public void setCommandChecks(List commandChecks) { + this.commandChecks = commandChecks; + } + + public CheckCalculation getCalculationCheck() { + return calculationCheck; + } + + public void setCalculationCheck(CheckCalculation calculationCheck) { + this.calculationCheck = calculationCheck; } public DbPaths(String name, DbSetProcess dbSet) { setDisplayName(name); setParent(dbSet); + if (scriptEngine == null) { + ScriptEngineManager sem = new ScriptEngineManager(); + scriptEngine = sem.getEngineByName("nashorn"); + } + } @Override public Boolean init() { Boolean rtn = Boolean.FALSE; - for (CommandReturnCheck check : parent.getChecks()) { - try { - CommandReturnCheck newCheck = (CommandReturnCheck) check.clone(); - checks.add(newCheck); - // Connect CommandReturnCheck counter to this counter as a child. - // TODO: Need to set Counters name from the 'check' - getCounter().addChild(newCheck.getCounter()); - // Redirect Output. - if (newCheck.getErrorFilename() == null) { - newCheck.setErrorStream(this.error); - } - if (newCheck.getSuccessFilename() == null) { - newCheck.setSuccessStream(this.success); + if (parent.getCommandChecks() != null) { + for (CommandReturnCheck check : parent.getCommandChecks()) { + try { + CommandReturnCheck newCheck = (CommandReturnCheck) check.clone(); + commandChecks.add(newCheck); + // Connect CommandReturnCheck counter to this counter as a child. + // TODO: Need to set Counters name from the 'check' + getCounter().addChild(newCheck.getCounter()); + // Redirect Output. + if (newCheck.getErrorFilename() == null) { + newCheck.setErrorStream(this.error); + } + if (newCheck.getSuccessFilename() == null) { + newCheck.setSuccessStream(this.success); + } + // TODO: Set success and error printstreams to output files. + } catch (CloneNotSupportedException e) { + e.printStackTrace(); } - // TODO: Set success and error printstreams to output files. + } + } + if (parent.getCalculationCheck() != null) { + try { + this.calculationCheck = (CheckCalculation) parent.getCalculationCheck().clone(); + this.calculationCheck.setSuccessStream(this.success); + this.calculationCheck.setErrorStream(this.error); } catch (CloneNotSupportedException e) { e.printStackTrace(); } @@ -90,6 +117,7 @@ public Boolean init() { public void run() { this.setStatus(STARTED); QueryDefinition queryDefinition = null; + try (Connection conn = getParent().getParent().getConnectionPools(). getMetastoreDirectConnection()) { @@ -117,38 +145,82 @@ public void run() { String[] args = new String[columnsArray.length]; for (int a = 0; a < columnsArray.length; a++) { if (columnsArray[a][i] != null) - args[a] = columnsArray[a][i]; + args[a] = columnsArray[a][i]; else args[a] = " "; // Prevent null in array. Messes up String.format when array has nulls. } - for (CommandReturnCheck lclCheck : getChecks()) { + if (getCommandChecks() != null) { + for (CommandReturnCheck lclCheck : getCommandChecks()) { + try { + String rcmd = lclCheck.getFullCommand(args); + if (rcmd != null) { + CommandReturn cr = getCliSession().processInput(rcmd); + lclCheck.incProcessed(1); + if (!cr.isError() || (lclCheck.getInvertCheck() && cr.isError())) { + lclCheck.onSuccess(cr); + lclCheck.incSuccess(1); + this.incSuccess(1); + } else { + lclCheck.onError(cr); + lclCheck.incError(1); + this.incError(1); + } + } + } catch (RuntimeException t) { + // Malformed cli request. Input is missing an element required to complete call. + // Unusual, but not an expection. + } + } + incProcessed(1); + } + + if (calculationCheck != null) { +// for (int j = 0; j < metastoreQueryDefinition.getListingColumns().length; j++) { +// record[j] = metastoreRecords[j][i]; +// } + + List combined = new LinkedList(Arrays.asList(args)); + + // Configured Params +// if (metastoreQueryDefinition.getCheck().getParams() != null) +// combined.addAll(Arrays.asList(metastoreQueryDefinition.getCheck().getParams())); try { - String rcmd = lclCheck.getFullCommand(args); - if (rcmd != null) { - CommandReturn cr = getCliSession().processInput(rcmd); - lclCheck.incProcessed(1); - if (!cr.isError() || (lclCheck.getInvertCheck() && cr.isError())) { - lclCheck.onSuccess(cr); - lclCheck.incSuccess(1); - this.incSuccess(1); - } else { - lclCheck.onError(cr); - lclCheck.incError(1); - this.incError(1); + String testStr = String.format(calculationCheck.getTest(), combined.toArray()); + Boolean checkTest = null; + checkTest = (Boolean) scriptEngine.eval(testStr); + if (checkTest) { + if (calculationCheck.getPass() != null) { + String passStr = String.format(calculationCheck.getPass(), combined.toArray()); + String passResult = (String) scriptEngine.eval(passStr); + success.println(passResult); + } + + } else { + if (calculationCheck.getFail() != null) { + String failStr = String.format(calculationCheck.getFail(), combined.toArray()); + String failResult = (String) scriptEngine.eval(failStr); + success.println(failResult); } } - } catch (RuntimeException t) { - // Malformed cli request. Input is missing an element required to complete call. - // Unusual, but not an expection. + incSuccess(1); + + incProcessed(1); + } catch (ScriptException e) { + e.printStackTrace(); + System.err.println("Issue with script eval: " + this.getDisplayName()); + } catch (MissingFormatArgumentException mfa) { + mfa.printStackTrace(); + System.err.println("Bad Argument Match up for PATH check rule: " + this.getDisplayName()); } + + } - incProcessed(1); } } } catch (SQLException e) { - if (getChecks().size() > 0) { - getChecks().get(0).errorStream.println((queryDefinition != null) ? queryDefinition.getStatement() : "Unknown"); - getChecks().get(0).errorStream.println("Failure in DbPaths" + e.getMessage()); + if (getCommandChecks().size() > 0) { + getCommandChecks().get(0).errorStream.println((queryDefinition != null) ? queryDefinition.getStatement() : "Unknown"); + getCommandChecks().get(0).errorStream.println("Failure in DbPaths" + e.getMessage()); } else { error.println((queryDefinition != null) ? queryDefinition.getStatement() : "Unknown"); error.println("Failure in DbPaths" + e.getMessage()); diff --git a/hive-sre/src/main/java/com/streever/hive/sre/DbSetProcess.java b/hive-sre/src/main/java/com/streever/hive/sre/DbSetProcess.java index 255a47e..568dcf7 100644 --- a/hive-sre/src/main/java/com/streever/hive/sre/DbSetProcess.java +++ b/hive-sre/src/main/java/com/streever/hive/sre/DbSetProcess.java @@ -24,18 +24,27 @@ public class DbSetProcess extends SreProcessBase { // private List dbPaths; - private List checks; + private List commandChecks; + private CheckCalculation calculationCheck; private String dbListingQuery; private String[] listingColumns; private String pathsListingQuery; - public List getChecks() { - return checks; + public List getCommandChecks() { + return commandChecks; } - public void setChecks(List checks) { - this.checks = checks; + public void setCommandChecks(List commandChecks) { + this.commandChecks = commandChecks; + } + + public CheckCalculation getCalculationCheck() { + return calculationCheck; + } + + public void setCalculationCheck(CheckCalculation calculationCheck) { + this.calculationCheck = calculationCheck; } public String getDbListingQuery() { @@ -73,71 +82,77 @@ public void setPathsListingQuery(String pathsListingQuery) { protected void initHeader() { if (getTitle() != null) this.success.println(ReportingConf.substituteVariables(getTitle())); + if (getNote() != null) + this.success.println(ReportingConf.substituteVariables(getNote())); + if (getHeader() != null) + this.success.println(ReportingConf.substituteVariables(getHeader())); - for (CommandReturnCheck check : getChecks()) { - if (getTitle() != null) { - check.successStream.println(ReportingConf.substituteVariables(getTitle())); - check.errorStream.println(ReportingConf.substituteVariables(getTitle())); - } - if (getHeader() != null) { - check.successStream.println(getHeader()); - check.errorStream.println(getHeader()); - } - if (getNote() != null) { - check.successStream.println(getNote()); - check.errorStream.println(getNote()); - } - - // If details for stream output are available in the check definition. - // Set the Header if defined. - if (check.getInvertCheck() && check.getTitle() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(ReportingConf.substituteVariables(check.getTitle())); - } - if (check.getProcessOnSuccess()) { - check.successStream.println(ReportingConf.substituteVariables(check.getTitle())); - } - } - if (check.getInvertCheck() && check.getNote() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(check.getNote()); - } - if (check.getProcessOnSuccess()) { - check.successStream.println(check.getNote()); + if (getCommandChecks() != null) { + for (CommandReturnCheck check : getCommandChecks()) { + if (getTitle() != null) { + check.successStream.println(ReportingConf.substituteVariables(getTitle())); + check.errorStream.println(ReportingConf.substituteVariables(getTitle())); } - } - if (check.getInvertCheck() && check.getHeader() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(check.getHeader()); + if (getNote() != null) { + check.successStream.println(getNote()); + check.errorStream.println(getNote()); } - if (check.getProcessOnSuccess()) { - check.successStream.println(check.getHeader()); + if (getHeader() != null) { + check.successStream.println(getHeader()); + check.errorStream.println(getHeader()); } - } - // TODO: Validate inversion. - if (!check.getInvertCheck() && check.getTitle() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(ReportingConf.substituteVariables(check.getTitle())); + // If details for stream output are available in the check definition. + // Set the Header if defined. + if (check.getInvertCheck() && check.getTitle() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(ReportingConf.substituteVariables(check.getTitle())); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(ReportingConf.substituteVariables(check.getTitle())); + } } - if (check.getProcessOnSuccess()) { - check.successStream.println(ReportingConf.substituteVariables(check.getTitle())); + if (check.getInvertCheck() && check.getNote() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(check.getNote()); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(check.getNote()); + } } - } - if (!check.getInvertCheck() && check.getNote() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(check.getNote()); + if (check.getInvertCheck() && check.getHeader() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(check.getHeader()); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(check.getHeader()); + } } - if (check.getProcessOnSuccess()) { - check.successStream.println(check.getNote()); + + // TODO: Validate inversion. + if (!check.getInvertCheck() && check.getTitle() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(ReportingConf.substituteVariables(check.getTitle())); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(ReportingConf.substituteVariables(check.getTitle())); + } } - } - if (!check.getInvertCheck() && check.getHeader() != null) { - if (check.getProcessOnError()) { - check.errorStream.println(check.getHeader()); + if (!check.getInvertCheck() && check.getNote() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(check.getNote()); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(check.getNote()); + } } - if (check.getProcessOnSuccess()) { - check.successStream.println(check.getHeader()); + if (!check.getInvertCheck() && check.getHeader() != null) { + if (check.getProcessOnError()) { + check.errorStream.println(check.getHeader()); + } + if (check.getProcessOnSuccess()) { + check.successStream.println(check.getHeader()); + } } } } @@ -148,17 +163,19 @@ protected void initHeader() { public void setOutputDirectory(String outputDirectory) throws FileNotFoundException { // Allow each Check to have its own output stream. super.setOutputDirectory(outputDirectory); - for (CommandReturnCheck check : getChecks()) { - // If details for stream output are available in the check definition. - if (check.getErrorFilename() != null) { - check.errorStream = outputFile(outputDirectory + System.getProperty("file.separator") + check.getErrorFilename()); - } else { - check.errorStream = this.error; - } - if (check.getSuccessFilename() != null) { - check.successStream = outputFile(outputDirectory + System.getProperty("file.separator") + check.getSuccessFilename()); - } else { - check.successStream = this.success; + if (getCommandChecks() != null) { + for (CommandReturnCheck check : getCommandChecks()) { + // If details for stream output are available in the check definition. + if (check.getErrorFilename() != null) { + check.errorStream = outputFile(outputDirectory + System.getProperty("file.separator") + check.getErrorFilename()); + } else { + check.errorStream = this.error; + } + if (check.getSuccessFilename() != null) { + check.successStream = outputFile(outputDirectory + System.getProperty("file.separator") + check.getSuccessFilename()); + } else { + check.successStream = this.success; + } } } } @@ -238,14 +255,16 @@ public String getOutputDetails() { StringBuilder sb = new StringBuilder(); if (defaultReturnInfo.length() > 0) sb.append(defaultReturnInfo).append("\n"); - for (CommandReturnCheck check : getChecks()) { - if (check.getSuccessFilename() != null) { - sb.append("\t" + check.getSuccessDescription() + " -> " + getOutputDirectory() + System.getProperty("file.separator") + - check.getSuccessFilename()).append("\n"); - } - if (check.getErrorFilename() != null) { - sb.append("\t" + check.getErrorDescription() + " -> " + getOutputDirectory() + System.getProperty("file.separator") + - check.getErrorFilename()); + if (getCommandChecks() != null) { + for (CommandReturnCheck check : getCommandChecks()) { + if (check.getSuccessFilename() != null) { + sb.append("\t" + check.getSuccessDescription() + " -> " + getOutputDirectory() + System.getProperty("file.separator") + + check.getSuccessFilename()).append("\n"); + } + if (check.getErrorFilename() != null) { + sb.append("\t" + check.getErrorDescription() + " -> " + getOutputDirectory() + System.getProperty("file.separator") + + check.getErrorFilename()); + } } } return sb.toString(); diff --git a/hive-sre/src/main/java/com/streever/hive/sre/Utils.java b/hive-sre/src/main/java/com/streever/hive/sre/Utils.java index 2e4d7d1..108e9a8 100644 --- a/hive-sre/src/main/java/com/streever/hive/sre/Utils.java +++ b/hive-sre/src/main/java/com/streever/hive/sre/Utils.java @@ -13,11 +13,16 @@ public static String dirToPartitionSpec(String directoryPart) throws Unsupported String[] directories = directoryPart.split("\\/"); String[] partitionSpecs = new String[directories.length]; int loc = 0; - for (String directory: directories) { - String[] specParts = directory.split("="); - String partDir = null; - partDir = URLDecoder.decode(specParts[1], StandardCharsets.UTF_8.toString()); - partitionSpecs[loc++] = specParts[0] + "=\"" + partDir + "\""; + try { + for (String directory : directories) { + String[] specParts = directory.split("="); + String partDir = null; + partDir = URLDecoder.decode(specParts[1], StandardCharsets.UTF_8.toString()); + partitionSpecs[loc++] = specParts[0] + "=\"" + partDir + "\""; + } + } catch (Throwable t) { + System.err.println("Issue with partition directory spec: " + directoryPart); +// throw t; } StringBuilder rtn = new StringBuilder(); rtn.append(StringUtils.join(partitionSpecs, ",")); diff --git a/hive-sre/src/main/resources/MYSQL/hive_sre_queries.yaml b/hive-sre/src/main/resources/MYSQL/hive_sre_queries.yaml index 1a4e23d..88446b8 100644 --- a/hive-sre/src/main/resources/MYSQL/hive_sre_queries.yaml +++ b/hive-sre/src/main/resources/MYSQL/hive_sre_queries.yaml @@ -260,6 +260,25 @@ query_definitions: statement: " SELECT COUNT(*) as count FROM FUNCS " + db_tbl_count: + statement: "SELECT + name, + COUNT(tbl_name) tbl_count + FROM + DBS dbs + INNER JOIN + TBLS tbls + ON + dbs.db_id = tbls.db_id + WHERE + dbs.name LIKE ? + GROUP BY + name" + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 acid_table_list: statement: " SELECT @@ -281,7 +300,13 @@ FROM ON T.TBL_ID = P.TBL_ID WHERE - T.TBL_TYPE = 'MANAGED_TABLE' + D.NAME = ? + AND T.TBL_TYPE = 'MANAGED_TABLE' AND PARAMS.PARAM_KEY = 'transactional' AND PARAMS.PARAM_VALUE in ('true','True','TRUE') - " \ No newline at end of file + " + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 diff --git a/hive-sre/src/main/resources/POSTGRES/hive_sre_queries.yaml b/hive-sre/src/main/resources/POSTGRES/hive_sre_queries.yaml index 85700e9..3de8098 100644 --- a/hive-sre/src/main/resources/POSTGRES/hive_sre_queries.yaml +++ b/hive-sre/src/main/resources/POSTGRES/hive_sre_queries.yaml @@ -283,6 +283,21 @@ SELECT FROM \"FUNCS\" " + db_tbl_count: + statement: "SELECT + \"DBS\".\"NAME\", + COUNT(\"TBLS\".\"TBL_NAME\") AS \"TBL_COUNT\" + FROM + \"DBS\" \"DBS\" + INNER JOIN + \"TBLS\" \"TBLS\" + ON + \"DBS\".\"DB_ID\" = \"TBLS\".\"DB_ID\" + WHERE + \"DBS\".\"NAME\" LIKE ? + GROUP BY + \"DBS\".\"NAME\" + " acid_table_list: statement: " SELECT @@ -304,7 +319,13 @@ SELECT ON \"T\".\"TBL_ID\" = \"P\".\"TBL_ID\" WHERE - \"T\".\"TBL_TYPE\" = 'MANAGED_TABLE' + \"D\".\"NAME\" = ? + AND \"T\".\"TBL_TYPE\" = 'MANAGED_TABLE' AND \"PARAMS\".\"PARAM_KEY\" = 'transactional' AND \"PARAMS\".\"PARAM_VALUE\" IN ('true', 'True', 'TRUE') " + parameters: + dbs: + initial: "%" + sqlType: 12 + location: 1 diff --git a/hive-sre/src/main/resources/procs/h3_upg_procs.yaml b/hive-sre/src/main/resources/procs/h3_upg_procs.yaml index 1baa454..779c3b7 100644 --- a/hive-sre/src/main/resources/procs/h3_upg_procs.yaml +++ b/hive-sre/src/main/resources/procs/h3_upg_procs.yaml @@ -7,7 +7,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location" ] pathsListingQuery: "tbl_part_locations" - checks: + commandChecks: - displayName: "Hive 3 Upgrade Check - Missing Directory Locations Scan" title: "#Hive 3 Upgrade Check (v.${Implementation-Version})\n\n## Missing Directory Locations Scan" note: " @@ -52,7 +52,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "db_name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_check" ] pathsListingQuery: "tbl_mngd_non_acid_locations" - checks: + commandChecks: - displayName: "Hive 3 Upgrade Check - Bad Filename Format for ACID ORC Conversion" title: "# Hive 3 Upgrade Check (v.${Implementation-Version})\n\n## Bad Filename Format for ACID ORC Conversion" header: "| DB.Table:Partition | Path | Filename |\n|:---|:---|:---|" @@ -61,7 +61,7 @@ processes: Tables that will be converted to ACID tables, require filenames to match a very specific pattern. If files in the table(partition) directories do not meet this pattern, the conversion WILL fail.\n\n There are two options for handling this:\n - - Convert the table to 'EXTERNAL' so the will NOT be converted. These tables are listed in the *Managed Non-ACID to ACID Table Migrations* + - Convert the table to 'EXTERNAL' so they will NOT be converted. These tables are listed in the *Managed Non-ACID to ACID Table Migrations* report.\n - Perform a classic 'INSERT OVERWRITE TABLE FROM '. This exercise will force 'Hive' to rewrite the table data back into the same directories. The process of _rewriting_ will ensure the filenames match the expected patterns.\n @@ -77,11 +77,6 @@ processes: reportOnPath: true processOnError: true processOnSuccess: false - # checkCalculations: - # managedCheck: - # RECORDS: - # test: "if ( \"%3$s\".equals(\"MANAGED\\_TABLE\")) true; else false;" - # pass: "\"| %1$s.%2$s:%4$s | %5$s | %7$s |\"" - type: "dbSet" id: 3 active: true @@ -94,7 +89,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "db_name" ,"tbl_name" , "tbl_type" ,"tbl_location" ] pathsListingQuery: "managed_2_acid" - checks: + commandChecks: - displayName: "Hive 3 Upgrade Check - Potential ACID Conversions" title: "-- Managed Non - ACID to ACID Table Migrations (v.${Implementation-Version})\n" note: " @@ -142,7 +137,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name", "path_location" ] pathsListingQuery: "managed_tbl_locations" - checks: + commandChecks: - displayName: "-- Hive 3 Upgrade Checks -- Compaction Check" title: "-- Hive 3 Upgrades - Compaction Check (v.${Implementation-Version})\n" note: " @@ -174,24 +169,6 @@ processes: pass: "\"ALTER TABLE %1$s.%2$s COMPACT 'MAJOR';\"" fail: "\"ALTER TABLE %1$s.%2$s PARTITION (\" + com.streever.hive.sre.Utils.dirToPartitionSpec('%4$s') + \") COMPACT \\\"MAJOR\\\"\\;\"" params: [ "hive" ] - - # - type: "metastore.query" - # id: 5 - # active: true - # name: "Questionable Serde's Check" - # queryDefinitionReference: "/hive_u3_queries.yaml" - # errorDescription: "Issues" - # successDescription: "Tables using non-standard SERDE's" - # errorFilename: "hive_questionable_serde_issues.txt" - # successFilename: "hive_questionable_serde.txt" - # metastoreQueryDefinition: - # query: "questionable_serdes" - # listingColumns: ["db_name" ,"tbl_name", "tbl_serde_slib"] - # resultMessageHeader: "***********************************************************\n - # Listed tables should be review to ensure the Serde is still available.\n - # Missing Serde's can disrupt a Hive Upgrade/Migration Process\n - # ***********************************************************" - # resultMessageDetailTemplate: "%1$s.%2$s is using a non-base serde '%3$s'" - type: "metastore.report" id: 5 active: true @@ -221,33 +198,3 @@ processes: resultMessageHeader: "\n## Table Partition Count" resultMessageDetailHeader: "| Database | Tables | Partitions |\n|:---|:---|:---|" resultMessageDetailTemplate: "| %1$s | %2$s | %3$s |" -# -# -# - type: "metastore.query" -# id: 6 -# active: true -# name: "Managed Table Shadows" -# queryDefinitionReference: "/hive_u3_queries.yaml" -# errorDescription: "Issues" -# successDescription: "Shadow Tables" -# errorFilename: "managed_tbl_shadows_issue.txt" -# successFilename: "managed_tbl_shadows.txt" -# metastoreQueryDefinition: -# query: "managed_tbl_shadows" -# listingColumns: ["db_name" ,"tbl_name", "tbl_location"] -# resultMessageHeader: "***********************************************************" -# resultMessageDetailTemplate: "Table %1$s.%2$s at location %3$s is sharing location with another managed table." -# - type: "metastore.query" -# id: 7 -# active: true -# name: "Database / Table / Partition Count" -# queryDefinitionReference: "/hive_u3_queries.yaml" -# errorDescription: "Issues" -# successDescription: "Database / Table / Partition Counts" -# errorFilename: "db_counts_issue.txt" -# successFilename: "db_counts.txt" -# metastoreQueryDefinition: -# query: "db_tbl_part_count" -# listingColumns: ["name" ,"tbl_count", "part_count"] -# resultMessageHeader: "***********************************************************" -# resultMessageDetailTemplate: "%1$s\t%2$s\t%3$s" \ No newline at end of file diff --git a/hive-sre/src/main/resources/procs/hive_sre_procs.yaml b/hive-sre/src/main/resources/procs/hive_sre_procs.yaml index dd4de53..0632485 100644 --- a/hive-sre/src/main/resources/procs/hive_sre_procs.yaml +++ b/hive-sre/src/main/resources/procs/hive_sre_procs.yaml @@ -44,59 +44,59 @@ processes: metastoreQueryDefinitions: - query: "dbs_parameters" listingColumns: [ "name" ,"param_key", "param_value" ] - resultMessageHeader: "\n## Database Parameters" + resultMessageHeader: "\n## Database Parameters\n" resultMessageDetailHeader: "| DB | Key | Value |\n|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|" - query: "partition_count_by_type" listingColumns: [ "tbl_type", "count" ] - resultMessageHeader: "\n## Partition Count by Table Type" + resultMessageHeader: "\n## Partition Count by Table Type\n" resultMessageDetailHeader: "| Table Type | Count |\n|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|" - query: "partition_count_by_table" listingColumns: [ "db_name" ,"tbl_name", "tbl_type", "num_of_partitions" ] - resultMessageHeader: "\n## Partition Count by Table" + resultMessageHeader: "\n## Partition Count by Table\n" resultMessageDetailHeader: "| Database | Table | Type | Num of Partitions|\n|:---|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|%4$s|" - query: "tbl_param_summary" listingColumns: [ "param_key", "count" ] - resultMessageHeader: "\n## Table Parameter Summary" + resultMessageHeader: "\n## Table Parameter Summary\n" resultMessageDetailHeader: "| Parameter | Count |\n|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|" - query: "transactional_param_use" listingColumns: [ "tbl_type" ,"param_value", "count" ] - resultMessageHeader: "\n## Table Parameter Use Summary" + resultMessageHeader: "\n## Table Parameter Use Summary\n" resultMessageDetailHeader: "| Table Type | Parameter | Count |\n|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|" - query: "transactional_tables" listingColumns: [ "name" ,"tbl_type", "param_value", "count" ] - resultMessageHeader: "\n## Transactional Table Parameter Summary" + resultMessageHeader: "\n## Transactional Table Parameter Summary\n" resultMessageDetailHeader: "| Database | Table Type | Transaction Flag | Count|\n|:---|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|%4$s|" - query: "serde_table_type_use_summary" listingColumns: [ "tbl_type" ,"input_format", "output_format", "count" ] - resultMessageHeader: "\n## SERDE Table Type Use Summary" + resultMessageHeader: "\n## SERDE Table Type Use Summary\n" resultMessageDetailHeader: "| Table Type | Input Format | Output Format | Count |\n|:---|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|%4$s|" - query: "serde_use_by_db_tbl_summary" listingColumns: [ "name" , "tbl_type" ,"input_format", "output_format", "count" ] - resultMessageHeader: "\n## Serde by Database / Table Type Summary" + resultMessageHeader: "\n## Serde by Database / Table Type Summary\n" resultMessageDetailHeader: "| Database | Table Type | Input Format | Output Format | Count |\n|:---|:---|:---|:---|:---|" resultMessageDetailTemplate: "|%1$s|%2$s|%3$s|%4$s|%5$s|" - type: "dbSet" id: 3 active: true displayName: "Table and Partition Scan for Small Files" - title: "# Hive Table and Partition Scan (v.${Implementation-Version})" + title: "# Hive Table and Partition Scan (v.${Implementation-Version})\n" queryDefinitionReference: "/hive_u3_queries.yaml" dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location" ] pathsListingQuery: "tbl_part_locations" - checks: + commandChecks: - displayName: "Small Files" title: "## Small Files Report (<64Mg/File Average Size Threshold)\n" note: "Small files are the #1 cause of poor table performance in Hive. Causes can range from ingestion techniques, insert append operations, and excessive partition strategies. Addressing table with small files that are *READ OFTEN* will - reduce compute resources, query times, and most likely space. + reduce compute resources, query times, and most likely space.\n " header: "| Database | Table | Partition | Path | Dir. Count | File Count | Total Size | Avg. Size(MB) |\n|:---|:---|:---|:---|---:|---:|---:|---:|" invertCheck: false @@ -125,9 +125,9 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location" ] pathsListingQuery: "tbl_part_locations" - checks: + commandChecks: - displayName: "Volume Report" - title: "## Volume Report" + title: "## Volume Report\n" header: "| Database | Table | Type | Partition | Path | Dir. Count | File Count | Total Size | \n|:---|:---|:---|:---|:---|---:|---:|---:|" invertCheck: false pathCommand: "count -h %5$s" @@ -155,7 +155,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location" ] pathsListingQuery: "tbl_part_locations" - checks: + commandChecks: - displayName: "Empty Tables / Partitions" title: "## Empty Tables / Partitions\n" note: "Empty Tables, usually the case with abandoned efforts that were cleanup on HDFS but not in the Hive @@ -187,7 +187,7 @@ processes: dbListingQuery: "db_tbl_count" listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name", "path_location" ] pathsListingQuery: "managed_tbl_locations" - checks: + commandChecks: - displayName: "Compaction Check" note: "-- Hive will compact managed tables under normal HIVE Standard Operations. Those compactions are triggered on tables/partitions once they've reached certain thresholds. But sometimes, tables/partitions that haven't reached @@ -213,7 +213,7 @@ processes: params: [ "hive" ] - type: "metastore.report" id: 7 - active: true + active: false displayName: "Hive ACID Tables" title: "# Hive ACID Tables (v.${Implementation-Version})" note: "Post Upgrade from Hive 1/2 to Hive 3 may result in statistics that are wrong. Actually, the statistics were @@ -235,3 +235,26 @@ processes: test: "if (\"%3$s\".equals(\"null\")) true; else false;" pass: "\"| %1$s | %2$s | %3$s | ALTER TABLE `%1$s`.`%2$s` COMPUTE STATISTICS FOR COLUMNS;|\"" fail: "\"| %1$s | %2$s | %3$s | ALTER TABLE `%1$s`.`%2$s` PARTITION (\" + com.streever.hive.sre.Utils.dirToPartitionSpec('%3$s') + \") COMPUTE STATISTICS FOR COLUMNS;\"" + - type: "dbSet" + id: 8 + active: false + displayName: "Hive ACID Tables" + title: "# Hive ACID Tables (v.${Implementation-Version})\n" + note: "Post Upgrade from Hive 1/2 to Hive 3 may result in statistics that are wrong. Actually, the statistics were + wrong in Hive 1/2, but the CBO wasn't using them as it should and recalculating plans. In Hive 3, these statistics when + wrong and picked up by the CBO, could cause datasets to be disregarded. If you are experience data issues with any + tables that were ACID tables BEFORE the upgrade, you should run 'ANALYZE' on them to fix those statistics.\n + " + header: "| DB | Table | Partitions | Hive SQL |\n|:---|:---|:---|:---|" + queryDefinitionReference: "/hive_sre_queries.yaml" + errorDescription: "Processing Issues" + successDescription: "Hive Metastore" + errorFilename: "acid_analyze_tables_err.txt" + successFilename: "acid_analyze_tables.md" + dbListingQuery: "db_tbl_count" + listingColumns: [ "name" ,"tbl_name", "part_name" ] + pathsListingQuery: "acid_table_list" + calculationCheck: + test: "if (\"%3$s\" == null || \"%3$s\".trim().length() == 0) true; else false;" + pass: "\"| %1$s | %2$s | %3$s | ALTER TABLE `%1$s`.`%2$s` COMPUTE STATISTICS FOR COLUMNS;|\"" + fail: "\"| %1$s | %2$s | %3$s | ALTER TABLE `%1$s`.`%2$s` PARTITION (\" + com.streever.hive.sre.Utils.dirToPartitionSpec('%3$s') + \") COMPUTE STATISTICS FOR COLUMNS;\""