From 513d4372880b48ff58d9e37f3323f905195d5a95 Mon Sep 17 00:00:00 2001 From: Partho Sarthi Date: Fri, 3 May 2024 10:40:12 -0700 Subject: [PATCH] Remove support for cloud based event logs Signed-off-by: Partho Sarthi --- ...rk] Profiling Tool Notebook Template.ipynb | 76 +----------------- ...Qualification Tool Notebook Template.ipynb | 79 ++----------------- ...fication User Tool Notebook Template.ipynb | 59 +------------- 3 files changed, 14 insertions(+), 200 deletions(-) diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb index ae9feda2f..5ed2de146 100644 --- a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Profiling Tool Notebook Template.ipynb @@ -45,14 +45,10 @@ }, "outputs": [], "source": [ - "import warnings\n", "DEFAULT_TOOLS_VER = \"24.02.4\"\n", "dbutils.widgets.text(\"Tools Version\", DEFAULT_TOOLS_VER)\n", "TOOLS_VER=dbutils.widgets.get(\"Tools Version\")\n", - "\n", "print(f\"Using Tools Version: {TOOLS_VER}\")\n", - "if TOOLS_VER != DEFAULT_TOOLS_VER:\n", - " print(f\"Warning: Requested tools version ({TOOLS_VER}) differs from the default version ({DEFAULT_TOOLS_VER}). This may require different versions of depedencies for compatibility.\")\n", "\n", "dbutils.widgets.dropdown(\"CSP\", \"aws\", [\"aws\", \"azure\"])\n", "CSP=dbutils.widgets.get(\"CSP\")\n", @@ -110,24 +106,10 @@ "TOOL_JAR_LOCAL_PATH = f'{OUTPUT_PATH}/rapids-4-spark-tools.jar'\n", "download_file(TOOL_JAR_URL, TOOL_JAR_LOCAL_PATH)\n", "\n", - "# Download dependency jars\n", - "HADOOP_JAR_VER = '3.3.4'\n", - "HADOOP_JAR_URL = f'https://repo.maven.apache.org/maven2/org/apache/hadoop/hadoop-{CSP}/{HADOOP_JAR_VER}/hadoop-{CSP}-{HADOOP_JAR_VER}.jar'\n", - "HADOOP_JAR_LOCAL_PATH = f'{OUTPUT_PATH}/hadoop-{CSP}-{HADOOP_JAR_VER}.jar'\n", - "download_file(HADOOP_JAR_URL, HADOOP_JAR_LOCAL_PATH)\n", - "\n", - "if CSP=='aws':\n", - " AWS_JAVA_VER = '1.12.262'\n", - " AWS_JAVA_URL = f'https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk/{AWS_JAVA_VER}/aws-java-sdk-{AWS_JAVA_VER}.jar'\n", - " AWS_JAVA_LOCAL_PATH = f'{OUTPUT_PATH}/aws-java-sdk-{AWS_JAVA_VER}.jar'\n", - " download_file(AWS_JAVA_URL, AWS_JAVA_LOCAL_PATH)\n", - " os.environ[\"AWS_JAVA_LOCAL_PATH\"] = AWS_JAVA_LOCAL_PATH\n", - "\n", "os.environ[\"CSP\"] = CSP\n", "os.environ[\"EVENTLOG_PATH\"] = EVENTLOG_PATH\n", "os.environ[\"OUTPUT_PATH\"] = OUTPUT_PATH\n", "os.environ[\"TOOL_JAR_LOCAL_PATH\"] = TOOL_JAR_LOCAL_PATH\n", - "os.environ[\"HADOOP_JAR_LOCAL_PATH\"] = HADOOP_JAR_LOCAL_PATH\n", "\n", "WORKER_INFO_PATH = os.path.join(OUTPUT_PATH, \"worker_info.yaml\")\n", "os.environ[\"WORKER_INFO_PATH\"] = WORKER_INFO_PATH" @@ -176,56 +158,6 @@ " f.write(worker_info)" ] }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "476b1278-afb7-43c3-95d8-99a4821bc20b", - "showTitle": true, - "title": "CSP Credentials" - } - }, - "outputs": [], - "source": [ - "# Secrets required if accessing from s3\n", - "os.environ[\"AWS_ACCESS_KEY_ID\"] = 'your_aws_access_key_id'\n", - "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'your_aws_secret_access_key'\n", - "# Secrets required if accessing from abfs\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"] = 'your_azure_storage_account_name'\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"] = 'your_azure_storage_account_key'" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "73f07b7f-41f4-4751-a97d-074c55cbb2f1", - "showTitle": true, - "title": "Set Java Options" - }, - "jupyter": { - "source_hidden": true - } - }, - "outputs": [], - "source": [ - "if CSP=='azure':\n", - " JAVA_OPTS=f' -Drapids.tools.hadoop.fs.azure.account.key.{os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"]}.dfs.core.windows.net={os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"]}'\n", - " os.environ[\"_JAVA_OPTIONS\"] = JAVA_OPTS" - ] - }, { "cell_type": "code", "execution_count": 0, @@ -269,7 +201,7 @@ "outputs": [], "source": [ "%sh\n", - "java -Xmx10g -cp $TOOL_JAR_LOCAL_PATH:$HADOOP_JAR_LOCAL_PATH:$AWS_JAVA_LOCAL_PATH:/databricks/jars/* com.nvidia.spark.rapids.tool.profiling.ProfileMain --platform databricks-$CSP --csv --worker-info $WORKER_INFO_PATH --auto-tuner -o $OUTPUT_PATH $EVENTLOG_PATH > $CONSOLEOUTPUT_PATH" + "java -Xmx10g -cp $TOOL_JAR_LOCAL_PATH:/databricks/jars/* com.nvidia.spark.rapids.tool.profiling.ProfileMain --platform databricks-$CSP --csv --worker-info $WORKER_INFO_PATH --auto-tuner -o $OUTPUT_PATH $EVENTLOG_PATH > $CONSOLEOUTPUT_PATH" ] }, { @@ -425,7 +357,7 @@ "stack": true }, "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc", - "origId": 451087074559338, + "origId": 2466908328410530, "title": "Executive View", "version": "DashboardViewV1", "width": 1600 @@ -439,7 +371,7 @@ "stack": true }, "nuid": "62243296-4562-4f06-90ac-d7a609f19c16", - "origId": 451087074559340, + "origId": 2466908328410531, "title": "App View", "version": "DashboardViewV1", "width": 1920 @@ -448,7 +380,7 @@ "language": "python", "notebookMetadata": { "mostRecentlyExecutedCommandWithImplicitDF": { - "commandId": 451087074559324, + "commandId": 2466908328410506, "dataframes": [ "_sqldf" ] diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb index 863c499b9..2f71a5442 100644 --- a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template.ipynb @@ -73,10 +73,7 @@ "DEFAULT_TOOLS_VER = \"24.02.4\"\n", "dbutils.widgets.text(\"Tools Version\", DEFAULT_TOOLS_VER)\n", "TOOLS_VER=dbutils.widgets.get(\"Tools Version\")\n", - "\n", "print(f\"Using Tools Version: {TOOLS_VER}\")\n", - "if TOOLS_VER != DEFAULT_TOOLS_VER:\n", - " print(f\"Warning: Requested tools version ({TOOLS_VER}) differs from the default version ({DEFAULT_TOOLS_VER}). This may require different versions of depedencies for compatibility.\")\n", "\n", "dbutils.widgets.dropdown(\"CSP\", \"aws\", [\"aws\", \"azure\"])\n", "CSP=dbutils.widgets.get(\"CSP\")\n", @@ -134,74 +131,10 @@ "TOOL_JAR_LOCAL_PATH = f'{OUTPUT_PATH}/rapids-4-spark-tools.jar'\n", "download_file(TOOL_JAR_URL, TOOL_JAR_LOCAL_PATH)\n", "\n", - "# Download dependency jars\n", - "HADOOP_JAR_VER = '3.3.4'\n", - "HADOOP_JAR_URL = f'https://repo.maven.apache.org/maven2/org/apache/hadoop/hadoop-{CSP}/{HADOOP_JAR_VER}/hadoop-{CSP}-{HADOOP_JAR_VER}.jar'\n", - "HADOOP_JAR_LOCAL_PATH = f'{OUTPUT_PATH}/hadoop-{CSP}-{HADOOP_JAR_VER}.jar'\n", - "download_file(HADOOP_JAR_URL, HADOOP_JAR_LOCAL_PATH)\n", - "\n", - "if CSP=='aws':\n", - " AWS_JAVA_VER = '1.12.262'\n", - " AWS_JAVA_URL = f'https://repo.maven.apache.org/maven2/com/amazonaws/aws-java-sdk/{AWS_JAVA_VER}/aws-java-sdk-{AWS_JAVA_VER}.jar'\n", - " AWS_JAVA_LOCAL_PATH = f'{OUTPUT_PATH}/aws-java-sdk-{AWS_JAVA_VER}.jar'\n", - " download_file(AWS_JAVA_URL, AWS_JAVA_LOCAL_PATH)\n", - " os.environ[\"AWS_JAVA_LOCAL_PATH\"] = AWS_JAVA_LOCAL_PATH\n", - "\n", "os.environ[\"CSP\"] = CSP\n", "os.environ[\"EVENTLOG_PATH\"] = EVENTLOG_PATH\n", "os.environ[\"OUTPUT_PATH\"] = OUTPUT_PATH\n", - "os.environ[\"TOOL_JAR_LOCAL_PATH\"] = TOOL_JAR_LOCAL_PATH\n", - "os.environ[\"HADOOP_JAR_LOCAL_PATH\"] = HADOOP_JAR_LOCAL_PATH" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "429e0ce8-961b-4b53-9878-82d2fdc56426", - "showTitle": true, - "title": "CSP Credentials" - } - }, - "outputs": [], - "source": [ - "# Secrets required if accessing from s3\n", - "os.environ[\"AWS_ACCESS_KEY_ID\"] = 'your_aws_access_key_id'\n", - "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'your_aws_secret_access_key'\n", - "# Secrets required if accessing from abfs\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"] = 'your_azure_storage_account_name'\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"] = 'your_azure_storage_account_key'" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "c09f8d10-2f60-45cd-843b-ef82eab9a097", - "showTitle": true, - "title": "Set Java Options" - }, - "jupyter": { - "source_hidden": true - } - }, - "outputs": [], - "source": [ - "if CSP=='azure':\n", - " JAVA_OPTS=f' -Drapids.tools.hadoop.fs.azure.account.key.{os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"]}.dfs.core.windows.net={os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"]}'\n", - " os.environ[\"_JAVA_OPTIONS\"] = JAVA_OPTS" + "os.environ[\"TOOL_JAR_LOCAL_PATH\"] = TOOL_JAR_LOCAL_PATH" ] }, { @@ -247,7 +180,7 @@ "outputs": [], "source": [ "%sh\n", - "java -Xmx10g -cp $TOOL_JAR_LOCAL_PATH:$HADOOP_JAR_LOCAL_PATH:$AWS_JAVA_LOCAL_PATH:/databricks/jars/* com.nvidia.spark.rapids.tool.qualification.QualificationMain --platform databricks-$CSP -o $OUTPUT_PATH $EVENTLOG_PATH > $CONSOLEOUTPUT_PATH" + "java -Xmx10g -cp $TOOL_JAR_LOCAL_PATH:/databricks/jars/* com.nvidia.spark.rapids.tool.qualification.QualificationMain --platform databricks-$CSP -o $OUTPUT_PATH $EVENTLOG_PATH > $CONSOLEOUTPUT_PATH" ] }, { @@ -431,7 +364,7 @@ "stack": true }, "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc", - "origId": 451087074559309, + "origId": 2466908328410528, "title": "Executive View", "version": "DashboardViewV1", "width": 1600 @@ -445,7 +378,7 @@ "stack": true }, "nuid": "62243296-4562-4f06-90ac-d7a609f19c16", - "origId": 451087074559310, + "origId": 2466908328410529, "title": "App View", "version": "DashboardViewV1", "width": 1920 @@ -454,7 +387,7 @@ "language": "python", "notebookMetadata": { "mostRecentlyExecutedCommandWithImplicitDF": { - "commandId": 451087074559302, + "commandId": 2466908328410490, "dataframes": [ "_sqldf" ] @@ -483,7 +416,7 @@ } ] }, - "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template-2", + "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification Tool Notebook Template", "widgets": { "CSP": { "currentValue": "aws", diff --git a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb index 0fc089680..77cbc5d50 100644 --- a/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb +++ b/tools/databricks/[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template.ipynb @@ -76,7 +76,6 @@ "DEFAULT_TOOLS_VER = \"24.02.4\"\n", "dbutils.widgets.text(\"Tools Version\", DEFAULT_TOOLS_VER)\n", "TOOLS_VER=dbutils.widgets.get(\"Tools Version\")\n", - "\n", "print(f\"Using Tools Version: {TOOLS_VER}\")" ] }, @@ -158,56 +157,6 @@ "os.environ[\"OUTPUT_PATH\"] = OUTPUT_PATH" ] }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "f24a9f4d-9021-4658-ab8f-451206978257", - "showTitle": true, - "title": "CSP Credentials" - } - }, - "outputs": [], - "source": [ - "# Secrets required if accessing from s3\n", - "os.environ[\"AWS_ACCESS_KEY_ID\"] = 'your_aws_access_key_id'\n", - "os.environ[\"AWS_SECRET_ACCESS_KEY\"] = 'your_aws_secret_access_key'\n", - "# Secrets required if accessing from abfs\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"] = 'your_azure_storage_account_name'\n", - "os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"] = 'your_azure_storage_account_key'" - ] - }, - { - "cell_type": "code", - "execution_count": 0, - "metadata": { - "application/vnd.databricks.v1+cell": { - "cellMetadata": { - "byteLimit": 2048000, - "rowLimit": 10000 - }, - "inputWidgets": {}, - "nuid": "c83817b1-afcc-4321-b3d6-3f6ac6a5affe", - "showTitle": true, - "title": "Set Java Options" - }, - "jupyter": { - "source_hidden": true - } - }, - "outputs": [], - "source": [ - "if CSP=='azure':\n", - " JAVA_OPTS=f' -Drapids.tools.hadoop.fs.azure.account.key.{os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"]}.dfs.core.windows.net={os.environ[\"AZURE_STORAGE_ACCOUNT_KEY\"]}'\n", - " os.environ[\"_JAVA_OPTIONS\"] = JAVA_OPTS" - ] - }, { "cell_type": "code", "execution_count": 0, @@ -465,7 +414,7 @@ "stack": true }, "nuid": "91c1bfb2-695a-4e5c-8a25-848a433108dc", - "origId": 451087074559339, + "origId": 2466908328410532, "title": "Executive View", "version": "DashboardViewV1", "width": 1600 @@ -479,7 +428,7 @@ "stack": true }, "nuid": "62243296-4562-4f06-90ac-d7a609f19c16", - "origId": 451087074559341, + "origId": 2466908328410533, "title": "App View", "version": "DashboardViewV1", "width": 1920 @@ -488,7 +437,7 @@ "language": "python", "notebookMetadata": { "mostRecentlyExecutedCommandWithImplicitDF": { - "commandId": 451087074559330, + "commandId": 2466908328410520, "dataframes": [ "_sqldf" ] @@ -517,7 +466,7 @@ } ] }, - "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template-2", + "notebookName": "[RAPIDS Accelerator for Apache Spark] Qualification User Tool Notebook Template", "widgets": { "CSP": { "currentValue": "aws",