Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing autotuner memory error to warning in comments #1418

Merged
merged 3 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -539,12 +539,12 @@ class AutoTuner(
// For now just throw so we don't get any tunings and its obvious to user this isn't a good
// setup. In the future we may just recommend them to use larger nodes. This would be more
// ideal once we hook up actual executor heap from an eventlog vs what user passes in.
throwNotEnoughMemException(minExecHeapMem + minOverhead)
warnNotEnoughMem(minExecHeapMem + minOverhead)
(0, 0, 0, false)
} else {
val leftOverMemUsingMinHeap = containerMem - minExecHeapMem
if (leftOverMemUsingMinHeap < 0) {
throwNotEnoughMemException(minExecHeapMem + minOverhead)
warnNotEnoughMem(minExecHeapMem + minOverhead)
}
// Pinned memory uses any unused space up to 4GB. Spill memory is same size as pinned.
val pinnedMem = Math.min(MAX_PINNED_MEMORY_MB, (leftOverMemUsingMinHeap / 2)).toLong
Expand All @@ -556,13 +556,12 @@ class AutoTuner(
}
}

private def throwNotEnoughMemException(minSize: Long): Unit = {
private def warnNotEnoughMem(minSize: Long): Unit = {
// in the future it would be nice to enhance the error message with a recommendation of size
val msg = "This node/worker configuration is not ideal for using the Spark Rapids " +
"Accelerator because it doesn't have enough memory for the executors. " +
val msg = "This node/worker configuration is not ideal for using the Spark Rapids \n" +
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: trailing space before the \n

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated in latest commit

"Accelerator because it doesn't have enough memory for the executors. \n" +
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: trailing space before the \n

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated in latest commit

s"We recommend using nodes/workers with more memory. Need at least ${minSize}MB memory."
logError(msg)
throw new IllegalArgumentException(msg)
appendComment(msg)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,15 @@ class AutoTunerSuite extends FunSuite with BeforeAndAfterEach with Logging {
val platform = PlatformFactory.createInstance(PlatformNames.DATAPROC, clusterPropsOpt)
val autoTuner = AutoTuner.buildAutoTunerFromProps(dataprocWorkerInfo,
getGpuAppMockInfoProvider, platform)
assertThrows[IllegalArgumentException](autoTuner.getRecommendedProperties())
val (properties, comments) = autoTuner.getRecommendedProperties()
val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
// scalastyle:off line.size.limit
val expectedComment =
s"""This node/worker configuration is not ideal for using the Spark Rapids
Accelerator because it doesn't have enough memory for the executors.
We recommend using nodes/workers with more memory. Need at least 7796MB memory.""".stripMargin.replaceAll("\n", "")
// scalastyle:on line.size.limit
assert(autoTunerOutput.replaceAll("\n", "").contains(expectedComment))
}

test("Load cluster properties with CPU memory missing") {
Expand Down