Skip to content

Commit

Permalink
Support running Databricks CI_PART2 integration tests with JARs built…
Browse files Browse the repository at this point in the history
… by CI_PART1

To fix: #11838

The CI_PART1 job uploads the built Spark Rapids tar file to Databricks DBFS storage.

The CI_PART2 job retrieves the built tar file from DBFS storage and runs integration tests against it.

Then the CI_PART2 job doesn't need to duplicate the building of Spark Rapids jars; it can save about 1 hour of Databricks time.

Signed-off-by: timl <[email protected]>
  • Loading branch information
NvTimLiu committed Dec 9, 2024
1 parent 0dbef90 commit 730b28b
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 13 deletions.
37 changes: 35 additions & 2 deletions jenkins/Jenkinsfile-blossom.premerge
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def skipped = false
def db_build = false
def sourcePattern = 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,' +
'sql-plugin/src/main/java/,sql-plugin/src/main/scala/'
// The path where the CI_PART1 job shares rapids plugin built tars with the CI_PART job
def plugin_built_dir = "dbfs:/cicd/$BUILD_TAG"

pipeline {
agent {
Expand Down Expand Up @@ -281,12 +283,14 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
steps {
script {
githubHelper.updateCommitStatus("", "Running - includes databricks", GitHubCommitState.PENDING)
//CI_PART1 upload plugin buit tars to PLUGIN_BUILT_DIR for CI_PART2
def DBJob = build(job: 'rapids-databricks_premerge-github',
propagate: false, wait: true,
parameters: [
string(name: 'REF', value: params.REF),
string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
string(name: 'TEST_MODE', value: 'CI_PART1')
string(name: 'TEST_MODE', value: 'CI_PART1'),
string(name: 'PLUGIN_BUILT_DIR', value: "$plugin_built_dir"),
])
if ( DBJob.result != 'SUCCESS' ) {
// Output Databricks failure logs to uploaded onto the pre-merge PR
Expand All @@ -304,13 +308,18 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
}
steps {
script {
container('cpu') {
waitForPluginBuiltTar(plugin_built_dir)
}

githubHelper.updateCommitStatus("", "Running - includes databricks", GitHubCommitState.PENDING)
def DBJob = build(job: 'rapids-databricks_premerge-github',
propagate: false, wait: true,
parameters: [
string(name: 'REF', value: params.REF),
string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
string(name: 'TEST_MODE', value: 'CI_PART2')
string(name: 'TEST_MODE', value: 'CI_PART2'),
string(name: 'PLUGIN_BUILT_DIR', value: "$plugin_built_dir"),
])
if ( DBJob.result != 'SUCCESS' ) {
// Output Databricks failure logs to uploaded onto the pre-merge PR
Expand Down Expand Up @@ -435,3 +444,27 @@ boolean databricksCodeChanged() {
}
return false
}

// Wait for the rapids plgin built tars to be ready
def waitForPluginBuiltTar(String tar_path, String db_type='aws', int timeout=60) {
// get DB runtimes from Jenkinsfile: "DB_RUNTIME [space]+ values 'x.y', 'a.b'"
def dbJenkinsfile = 'jenkins/Jenkinsfile-blossom.premerge-databricks'
def DBRuntimes = sh(script: "grep -Pzo 'DB_RUNTIME.*\\n\\s*values.*' $dbJenkinsfile", returnStdout: true)
// DB_RUNTIME\n values 'x.y', 'a.b' --> x.y,a.b
DBRuntimes = DBRuntimes.replaceAll("DB_RUNTIME.*|.*values|[\"']|\\s+", '').trim()
// x.y,a.b --> /path/x.y/,/path/x.y/

def DBFiles = DBRuntimes.split(',').collect { "$tar_path/$it/" }.join(',')
def databricks_host = DbUtils.getHost("$db_type")
def dbTokenId = DbUtils.getToken("$db_type")
withCredentials([string(credentialsId: dbTokenId, variable: 'DATABRICKS_TOKEN')]) {
withEnv(["DATABRICKS_HOST=$databricks_host"]) {
// wait for all the rapids plugin tars built in CI_PART1 to be ready
if (DbUtils.allFilesExist(this, DBFiles, timeout)) {
println('Rapids plugin built tars are ready for CI_PART2')
} else {
error "Timeout, rapids plugin built tars are not ready for CI_PART2"
}
}
}
}
36 changes: 25 additions & 11 deletions jenkins/Jenkinsfile-blossom.premerge-databricks
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ pipeline {
description: 'Json-formatted github data from upstream blossom-ci')
choice(name: 'TEST_MODE', choices: ['CI_PART1', 'CI_PART2'],
description: 'Separate integration tests into 2 parts, and run each part in parallell')
string(name: 'PLUGIN_BUILT_DIR', defaultValue: 'dbfs:/cicd',
description: 'CI_PART1 uploads spark-rapids built tgz for CI_PART2')
}

environment {
Expand All @@ -77,7 +79,7 @@ pipeline {
script {
githubHelper = GithubHelper.getInstance("${GITHUB_TOKEN}", params.GITHUB_DATA)
// desc contains the PR ID and can be accessed from different builds
currentBuild.description = githubHelper.getBuildDescription()
currentBuild.description = githubHelper.getBuildDescription() + " | $TEST_MODE"
checkoutCode(githubHelper.getCloneUrl(), githubHelper.getMergedSHA())
}
}
Expand Down Expand Up @@ -138,6 +140,11 @@ void databricksBuild() {
def CLUSTER_ID = ''
def SPARK_MAJOR = BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS.replace('.', '')
def dbStep = ''
def pluginBuiltTar ="$PLUGIN_BUILT_DIR/$DB_RUNTIME/spark-rapids-built.tgz"
// Map DBFS path to the local path into the cluster
def buildArgs = (params.TEST_MODE == 'CI_PART1') ? pluginBuiltTar.replace('dbfs:/', '/dbfs/') : ''
def testArgs = (params.TEST_MODE == 'CI_PART2') ? pluginBuiltTar.replace('dbfs:/', '/dbfs/') : ''

try {
stage("Create $SPARK_MAJOR DB") {
dbStep = 'CREATE'
Expand All @@ -148,17 +155,21 @@ void databricksBuild() {
echo CLUSTER_ID
}

stage("Build against $SPARK_MAJOR DB") {
sh "rm -rf spark-rapids-ci.tgz"
sh "tar -zcf spark-rapids-ci.tgz * .git"
dbStep = 'BUILD'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def BUILD_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
retry(3) {
sh "python3 ./jenkins/databricks/run-build.py $BUILD_PARAMS"
if (params.TEST_MODE == 'CI_PART1') {
stage("Build against $SPARK_MAJOR DB") {
sh "rm -rf spark-rapids-ci.tgz"
sh "tar -zcf spark-rapids-ci.tgz * .git"
dbStep = 'BUILD'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def BUILD_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
retry(3) {
// Back-up built tar to the path "$buildArgs" on Databricks cluster
// Refer to https://github.com/NvTimLiu/spark-rapids/blob/d030630c1/jenkins/databricks/build.sh#L190-L194
sh "python3 ./jenkins/databricks/run-build.py $BUILD_PARAMS $buildArgs"
}
}
sh "rm spark-rapids-ci.tgz"
}
sh "rm spark-rapids-ci.tgz"
}

// TODO: Temporarily skip tests on Databricks 14.3 until the test failures are fixed
Expand All @@ -167,14 +178,17 @@ void databricksBuild() {
dbStep = 'TEST'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS"
// Get built tar from the path "$testArgs" on Databricks cluster
// Refer to https://github.com/NvTimLiu/spark-rapids/blob/d030630c1/jenkins/databricks/test.sh#L52-L55
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
}
}
}
} finally {
if (CLUSTER_ID) {
(dbStep == 'TEST') ? common.publishPytestResult(this, "Test against $SPARK_MAJOR DB") : ''
retry(3) {
params.TEST_MODE == 'CI_PART1' ? DbUtils.cleanUp(this, "$PLUGIN_BUILT_DIR/$DB_RUNTIME") : ''
env.INIT_SCRIPTS ? DbUtils.cleanUp(this, env.INIT_SCRIPTS_DIR) : ''
sh "python3 ./jenkins/databricks/shutdown.py -s $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID -d"
}
Expand Down

0 comments on commit 730b28b

Please sign in to comment.