From 6a472ba8eef628fa6efb0540b9f97a04fdff092c Mon Sep 17 00:00:00 2001 From: liyuan Date: Thu, 23 Nov 2023 11:33:39 +0800 Subject: [PATCH 1/5] add markdown link checker Signed-off-by: liyuan --- .github/workflows/markdown-links-check.yml | 37 +++++++++++++++++++ .../markdown-links-check-config.json | 17 +++++++++ 2 files changed, 54 insertions(+) create mode 100644 .github/workflows/markdown-links-check.yml create mode 100644 .github/workflows/markdown-links-check/markdown-links-check-config.json diff --git a/.github/workflows/markdown-links-check.yml b/.github/workflows/markdown-links-check.yml new file mode 100644 index 000000000..1287f4af6 --- /dev/null +++ b/.github/workflows/markdown-links-check.yml @@ -0,0 +1,37 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A workflow to check if PR got broken hyperlinks +name: Check Markdown links + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + markdown-link-check: + runs-on: ubuntu-latest + steps: + - name: work around permission issue + run: git config --global --add safe.directory /github/workspace + - uses: actions/checkout@master + - uses: gaurav-nelson/github-action-markdown-link-check@v1 + with: + max-depth: -1 + use-verbose-mode: 'yes' + check-modified-files-only: 'yes' + config-file: '.github/workflows/markdown-links-check/markdown-links-check-config.json' + base-branch: 'main' + + \ No newline at end of file diff --git a/.github/workflows/markdown-links-check/markdown-links-check-config.json b/.github/workflows/markdown-links-check/markdown-links-check-config.json new file mode 100644 index 000000000..ec4af8ca8 --- /dev/null +++ b/.github/workflows/markdown-links-check/markdown-links-check-config.json @@ -0,0 +1,17 @@ +{ + "ignorePatterns": [ + { + "pattern": "https://github.com/NVIDIA/spark-rapids-tools/issues/*" + }, + { + "pattern": "http://localhost*" + }, + { + "pattern": "https://www.nvidia.com/en-us/security/pgp-key" + } + ], + "timeout": "15s", + "retryOn429": true, + "retryCount":30, + "aliveStatusCodes": [200, 403] +} \ No newline at end of file From 3b2649733a9ac20ad4c15aded366d8d6e998ad04 Mon Sep 17 00:00:00 2001 From: liyuan Date: Thu, 23 Nov 2023 14:22:12 +0800 Subject: [PATCH 2/5] test markdown link Signed-off-by: liyuan --- data_validation/docs/validation-tools.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_validation/docs/validation-tools.md b/data_validation/docs/validation-tools.md index 651d10740..994095801 100644 --- a/data_validation/docs/validation-tools.md +++ b/data_validation/docs/validation-tools.md @@ -8,7 +8,7 @@ whether the Spark job using RAPIDS Accelerator(aka GPU Spark job) returns the sa ### 1.gcloud CLI -- Install the gcloud CLI. Follow the instructions on [gcloud-sdk-install](https://cloud.google.com/sdk/docs/install) +- Install the gcloud CLI. Follow the instructions on [gcloud-sdk-install](https://cloud.google.com/sdk/docs/installs) - Set the configuration settings and credentials of the gcloud CLI: - Initialize the gcloud CLI by following [these instructions](https://cloud.google.com/sdk/docs/initializing#initialize_the) - Grant authorization to the gcloud CLI [with a user account](https://cloud.google.com/sdk/docs/authorizing#authorize_with_a_user_account) From bc236e77336a341f8ce34a22b81725cfd074ffd0 Mon Sep 17 00:00:00 2001 From: liyuan Date: Thu, 23 Nov 2023 14:24:20 +0800 Subject: [PATCH 3/5] revert test markdown link Signed-off-by: liyuan --- data_validation/docs/validation-tools.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_validation/docs/validation-tools.md b/data_validation/docs/validation-tools.md index 994095801..651d10740 100644 --- a/data_validation/docs/validation-tools.md +++ b/data_validation/docs/validation-tools.md @@ -8,7 +8,7 @@ whether the Spark job using RAPIDS Accelerator(aka GPU Spark job) returns the sa ### 1.gcloud CLI -- Install the gcloud CLI. Follow the instructions on [gcloud-sdk-install](https://cloud.google.com/sdk/docs/installs) +- Install the gcloud CLI. Follow the instructions on [gcloud-sdk-install](https://cloud.google.com/sdk/docs/install) - Set the configuration settings and credentials of the gcloud CLI: - Initialize the gcloud CLI by following [these instructions](https://cloud.google.com/sdk/docs/initializing#initialize_the) - Grant authorization to the gcloud CLI [with a user account](https://cloud.google.com/sdk/docs/authorizing#authorize_with_a_user_account) From c7681c109e628beaf7034dd56cf67c780a834c15 Mon Sep 17 00:00:00 2001 From: liyuan Date: Wed, 29 Nov 2023 10:52:11 +0800 Subject: [PATCH 4/5] update base-branch and check all files Signed-off-by: liyuan --- .github/workflows/markdown-links-check.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/markdown-links-check.yml b/.github/workflows/markdown-links-check.yml index 1287f4af6..833b99ec8 100644 --- a/.github/workflows/markdown-links-check.yml +++ b/.github/workflows/markdown-links-check.yml @@ -30,8 +30,7 @@ jobs: with: max-depth: -1 use-verbose-mode: 'yes' - check-modified-files-only: 'yes' config-file: '.github/workflows/markdown-links-check/markdown-links-check-config.json' - base-branch: 'main' + base-branch: 'dev' \ No newline at end of file From c629ec24bd54558bc3aa7cfff5c9f7b99ee36a4b Mon Sep 17 00:00:00 2001 From: liyuan Date: Wed, 29 Nov 2023 11:22:33 +0800 Subject: [PATCH 5/5] fix dead links Signed-off-by: liyuan --- README.md | 9 ++++++--- user_tools/docs/user-tools-databricks-aws.md | 2 +- user_tools/docs/user-tools-databricks-azure.md | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 42f142176..ac1a5dcfb 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,15 @@ This repo provides the tools to use [RAPIDS Accelerator for Apache Spark](https: ## Catalog -- [RAPIDS core tools](/core): Tools that help developers getting the most out of their Apache Spark applications +- [RAPIDS core tools](./core): Tools that help developers getting the most out of their Apache + Spark applications without any code change: - Report acceleration potential of RAPIDS Accelerator for Apache Spark on a set of Spark applications. - Generate comprehensive profiling analysis for Apache Sparks executing on accelerated GPU instances. This information can be used to further tune and optimize the application. -- [spark-rapids-user-tools](/user_tools): A simple wrapper process around cloud service providers to run - [RAPIDS core tools](/core) across multiple cloud platforms. In addition, the output educates the users on +- [spark-rapids-user-tools](./user_tools): A simple wrapper process around cloud service + providers to run + [RAPIDS core tools](./core) across multiple cloud platforms. In addition, the output educates + the users on the cost savings and acceleration potential of RAPIDS Accelerator for Apache Spark and makes recommendations to tune the application performance based on the cluster shape. diff --git a/user_tools/docs/user-tools-databricks-aws.md b/user_tools/docs/user-tools-databricks-aws.md index 8e94e654d..2e9198af4 100644 --- a/user_tools/docs/user-tools-databricks-aws.md +++ b/user_tools/docs/user-tools-databricks-aws.md @@ -43,7 +43,7 @@ Before running any command, you can set environment variables to specify configu - RAPIDS variables have a naming pattern `RAPIDS_USER_TOOLS_*`: - `RAPIDS_USER_TOOLS_CACHE_FOLDER`: specifies the location of a local directory that the RAPIDS-cli uses to store and cache the downloaded resources. The default is `/var/tmp/spark_rapids_user_tools_cache`. Note that caching the resources locally has an impact on the total execution time of the command. - `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY`: specifies the location of a local directory that the RAPIDS-cli uses to generate the output. The wrapper CLI arguments override that environment variable (`--local_folder` for Qualification). -- For Databricks CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth.html#environment-variables-and-fields-for-client-unified-authentication). +- For Databricks CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth/index.html#environment-variables-and-fields-for-client-unified-authentication). - For AWS CLI, some environment variables can be set and picked by the RAPIDS-user tools such as: `AWS_SHARED_CREDENTIALS_FILE`, `AWS_CONFIG_FILE`, `AWS_REGION`, `AWS_DEFAULT_REGION`, `AWS_PROFILE` and `AWS_DEFAULT_OUTPUT`. See the full list of variables in [aws-cli-configure-envvars](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html). ## Qualification command diff --git a/user_tools/docs/user-tools-databricks-azure.md b/user_tools/docs/user-tools-databricks-azure.md index 2605b70e8..96cf6888e 100644 --- a/user_tools/docs/user-tools-databricks-azure.md +++ b/user_tools/docs/user-tools-databricks-azure.md @@ -47,7 +47,7 @@ Before running any command, you can set environment variables to specify configu - RAPIDS variables have a naming pattern `RAPIDS_USER_TOOLS_*`: - `RAPIDS_USER_TOOLS_CACHE_FOLDER`: specifies the location of a local directory that the RAPIDS-cli uses to store and cache the downloaded resources. The default is `/var/tmp/spark_rapids_user_tools_cache`. Note that caching the resources locally has an impact on the total execution time of the command. - `RAPIDS_USER_TOOLS_OUTPUT_DIRECTORY`: specifies the location of a local directory that the RAPIDS-cli uses to generate the output. The wrapper CLI arguments override that environment variable (`--local_folder` for Qualification). -- For Databricks CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth.html#environment-variables-and-fields-for-client-unified-authentication). +- For Databricks CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `DATABRICKS_CONFIG_FILE`, `DATABRICKS_HOST` and `DATABRICKS_TOKEN`. See the description of the variables in [Environment variables](https://docs.databricks.com/en/dev-tools/auth/index.html#environment-variables-and-fields-for-client-unified-authentication). - For Azure CLI, some environment variables can be set and picked up by the RAPIDS-user tools such as: `AZURE_CONFIG_FILE` and `AZURE_DEFAULTS_LOCATION`. ## Qualification command