Skip to content

Commit

Permalink
Add logging for sketches on workers (#16697)
Browse files Browse the repository at this point in the history
Improve the logging of sketches on workers.
  • Loading branch information
adarshsanjeev authored Jul 9, 2024
1 parent af5399c commit 7c62535
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1772,6 +1772,7 @@ private ResultAndChannels<?> gatherResultKeyStatistics(final OutputChannels chan
@Override
public void onSuccess(final ClusterByStatisticsCollector result)
{
result.logSketches();
kernelManipulationQueue.add(
holder ->
holder.getStageKernelMap().get(stageDefinition.getId())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ public Either<Long, ClusterByPartitions> generatePartitionsForGlobalSort(
} else if (maxPartitions > maxNumPartitions) {
return Either.error((long) maxPartitions);
} else {
collector.logSketches();
final ClusterByPartitions generatedPartitions = collector.generatePartitionsWithMaxCount(maxPartitions);
if (generatedPartitions.size() <= maxNumPartitions) {
return Either.value(generatedPartitions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public Either<Long, ClusterByPartitions> generatePartitionsForGlobalSort(
if (expectedPartitions > maxNumPartitions) {
return Either.error(expectedPartitions);
} else {
collector.logSketches();
final ClusterByPartitions generatedPartitions = collector.generatePartitionsWithTargetWeight(targetSize);
if (generatedPartitions.size() <= maxNumPartitions) {
return Either.value(generatedPartitions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ public interface ClusterByStatisticsCollector
*/
ClusterByPartitions generatePartitionsWithMaxCount(int maxNumPartitions);

/**
* Logs some information regarding the collector. This is useful in seeing which sketches were downsampled the most.
*/
void logSketches();

/**
* Returns an immutable, JSON-serializable snapshot of this collector.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,6 @@ public ClusterByStatisticsCollector clear()
@Override
public ClusterByPartitions generatePartitionsWithTargetWeight(final long targetWeight)
{
logSketches();

if (targetWeight < 1) {
throw new IAE("Target weight must be positive");
}
Expand Down Expand Up @@ -288,8 +286,6 @@ public ClusterByPartitions generatePartitionsWithTargetWeight(final long targetW
@Override
public ClusterByPartitions generatePartitionsWithMaxCount(final int maxNumPartitions)
{
logSketches();

if (maxNumPartitions < 1) {
throw new IAE("Must have at least one partition");
} else if (buckets.isEmpty()) {
Expand Down Expand Up @@ -331,7 +327,8 @@ public ClusterByPartitions generatePartitionsWithMaxCount(final int maxNumPartit
return ranges;
}

private void logSketches()
@Override
public void logSketches()
{
if (log.isDebugEnabled()) {
// Log all sketches
Expand Down

0 comments on commit 7c62535

Please sign in to comment.