Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support running Databricks CI_PART2 integration tests with JARs built by CI_PART1 [databricks] #11840

Merged
merged 5 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions jenkins/Jenkinsfile-blossom.premerge
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def skipped = false
def db_build = false
def sourcePattern = 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,' +
'sql-plugin/src/main/java/,sql-plugin/src/main/scala/'
// The path where the CI_PART1 job shares rapids plugin built tars with the CI_PART job
def plugin_built_dir = "dbfs:/cicd/$BUILD_TAG"

pipeline {
agent {
Expand Down Expand Up @@ -281,12 +283,14 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
steps {
script {
githubHelper.updateCommitStatus("", "Running - includes databricks", GitHubCommitState.PENDING)
//CI_PART1 upload plugin buit tars to PLUGIN_BUILT_DIR for CI_PART2
def DBJob = build(job: 'rapids-databricks_premerge-github',
propagate: false, wait: true,
parameters: [
string(name: 'REF', value: params.REF),
string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
string(name: 'TEST_MODE', value: 'CI_PART1')
string(name: 'TEST_MODE', value: 'CI_PART1'),
string(name: 'PLUGIN_BUILT_DIR', value: "$plugin_built_dir"),
])
if ( DBJob.result != 'SUCCESS' ) {
// Output Databricks failure logs to uploaded onto the pre-merge PR
Expand All @@ -310,7 +314,8 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
parameters: [
string(name: 'REF', value: params.REF),
string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
string(name: 'TEST_MODE', value: 'CI_PART2')
string(name: 'TEST_MODE', value: 'CI_PART2'),
string(name: 'PLUGIN_BUILT_DIR', value: "$plugin_built_dir"),
])
if ( DBJob.result != 'SUCCESS' ) {
// Output Databricks failure logs to uploaded onto the pre-merge PR
Expand Down
46 changes: 35 additions & 11 deletions jenkins/Jenkinsfile-blossom.premerge-databricks
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ pipeline {
description: 'Json-formatted github data from upstream blossom-ci')
choice(name: 'TEST_MODE', choices: ['CI_PART1', 'CI_PART2'],
description: 'Separate integration tests into 2 parts, and run each part in parallell')
string(name: 'PLUGIN_BUILT_DIR', defaultValue: 'dbfs:/cicd',
description: 'CI_PART1 uploads spark-rapids built tgz for CI_PART2')
}

environment {
Expand All @@ -77,7 +79,7 @@ pipeline {
script {
githubHelper = GithubHelper.getInstance("${GITHUB_TOKEN}", params.GITHUB_DATA)
// desc contains the PR ID and can be accessed from different builds
currentBuild.description = githubHelper.getBuildDescription()
currentBuild.description = githubHelper.getBuildDescription() + " | $TEST_MODE"
checkoutCode(githubHelper.getCloneUrl(), githubHelper.getMergedSHA())
}
}
Expand Down Expand Up @@ -138,7 +140,22 @@ void databricksBuild() {
def CLUSTER_ID = ''
def SPARK_MAJOR = BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS.replace('.', '')
def dbStep = ''
def pluginBuiltTar ="$PLUGIN_BUILT_DIR/$DB_RUNTIME/spark-rapids-built.tgz"
// Map DBFS path to the local path into the cluster
def buildArgs = (params.TEST_MODE == 'CI_PART1') ? pluginBuiltTar.replace('dbfs:/', '/dbfs/') : ''
def testArgs = (params.TEST_MODE == 'CI_PART2') ? pluginBuiltTar.replace('dbfs:/', '/dbfs/') : ''

try {
// wait for all the rapids plugin tars built in CI_PART1 to be ready
if (params.TEST_MODE == 'CI_PART2') {
// Check if the comma-separated files exist in the Databricks DBFS path within timeout minutes
if (DbUtils.filesExist(this, "$pluginBuiltTar", 60)) {
println('Rapids plugin built tars are ready for CI_PART2')
} else {
error "Timeout, rapids plugin built tars are not ready for CI_PART2"
NvTimLiu marked this conversation as resolved.
Show resolved Hide resolved
}
}

stage("Create $SPARK_MAJOR DB") {
dbStep = 'CREATE'
// Add the init_script parameter, e.g. oo.sh,bar.sh --> /path/foo.sh,/path/bar.sh
Expand All @@ -148,17 +165,21 @@ void databricksBuild() {
echo CLUSTER_ID
}

stage("Build against $SPARK_MAJOR DB") {
sh "rm -rf spark-rapids-ci.tgz"
sh "tar -zcf spark-rapids-ci.tgz * .git"
dbStep = 'BUILD'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def BUILD_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
retry(3) {
sh "python3 ./jenkins/databricks/run-build.py $BUILD_PARAMS"
if (params.TEST_MODE == 'CI_PART1') {
stage("Build against $SPARK_MAJOR DB") {
sh "rm -rf spark-rapids-ci.tgz"
sh "tar -zcf spark-rapids-ci.tgz * .git"
dbStep = 'BUILD'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def BUILD_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
retry(3) {
// Back-up built tar to the path "$buildArgs" on Databricks cluster
// Refer to https://github.com/NvTimLiu/spark-rapids/blob/d030630c1/jenkins/databricks/build.sh#L190-L194
NvTimLiu marked this conversation as resolved.
Show resolved Hide resolved
sh "python3 ./jenkins/databricks/run-build.py $BUILD_PARAMS $buildArgs"
}
}
sh "rm spark-rapids-ci.tgz"
}
sh "rm spark-rapids-ci.tgz"
}

// TODO: Temporarily skip tests on Databricks 14.3 until the test failures are fixed
Expand All @@ -167,14 +188,17 @@ void databricksBuild() {
dbStep = 'TEST'
withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
def TEST_PARAMS = DbUtils.getParameters(this, dbStep, "-c $CLUSTER_ID")
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS"
// Get built tar from the path "$testArgs" on Databricks cluster
// Refer to https://github.com/NvTimLiu/spark-rapids/blob/d030630c1/jenkins/databricks/test.sh#L52-L55
NvTimLiu marked this conversation as resolved.
Show resolved Hide resolved
sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS $testArgs"
}
}
}
} finally {
if (CLUSTER_ID) {
(dbStep == 'TEST') ? common.publishPytestResult(this, "Test against $SPARK_MAJOR DB") : ''
retry(3) {
params.TEST_MODE == 'CI_PART1' ? DbUtils.cleanUp(this, "$PLUGIN_BUILT_DIR/$DB_RUNTIME") : ''
NvTimLiu marked this conversation as resolved.
Show resolved Hide resolved
env.INIT_SCRIPTS ? DbUtils.cleanUp(this, env.INIT_SCRIPTS_DIR) : ''
sh "python3 ./jenkins/databricks/shutdown.py -s $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID -d"
}
Expand Down
Loading