Skip to content

Commit

Permalink
Allocate pending segments at latest committed version (apache#15459)
Browse files Browse the repository at this point in the history
The segment allocation algorithm reuses an already allocated pending segment if the new allocation request is made for the same parameters:

datasource
sequence name
same interval
same value of skipSegmentLineageCheck (false for batch append, true for streaming append)
same previous segment id (used only when skipSegmentLineageCheck = false)
The above parameters can thus uniquely identify a pending segment (enforced by the UNIQUE constraint on the sequence_name_prev_id_sha1 column in druid_pendingSegments metadata table).

This reuse is done in order to

allow replica tasks (in case of streaming ingestion) to use the same set of segment IDs.
allow re-run of a failed batch task to use the same segment ID and prevent unnecessary allocations
  • Loading branch information
kfaraz authored Dec 14, 2023
1 parent e43bb74 commit feeb4f0
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 230 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -70,9 +70,6 @@
@RunWith(Parameterized.class)
public class SegmentAllocateActionTest
{
@Rule
public ExpectedException thrown = ExpectedException.none();

@Rule
public TaskActionTestKit taskActionTestKit = new TaskActionTestKit();

Expand Down Expand Up @@ -403,6 +400,72 @@ public void testResumeSequence()
assertSameIdentifier(id2, id7);
}

@Test
public void testSegmentIsAllocatedForLatestUsedSegmentVersion() throws IOException
{
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);

final String sequenceName = "sequence_1";

// Allocate segments when there are no committed segments
final SegmentIdWithShardSpec pendingSegmentV01 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);
final SegmentIdWithShardSpec pendingSegmentV02 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);

assertSameIdentifier(pendingSegmentV01, pendingSegmentV02);

// Commit a segment for version V1
final DataSegment segmentV1
= DataSegment.builder()
.dataSource(DATA_SOURCE)
.interval(Granularities.HOUR.bucket(PARTY_TIME))
.version(PARTY_TIME.plusDays(1).toString())
.shardSpec(new LinearShardSpec(0))
.size(100)
.build();
taskActionTestKit.getMetadataStorageCoordinator().commitSegments(
Collections.singleton(segmentV1)
);

// Verify that new allocations use version V1
final SegmentIdWithShardSpec pendingSegmentV11 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);
final SegmentIdWithShardSpec pendingSegmentV12 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);

assertSameIdentifier(pendingSegmentV11, pendingSegmentV12);
Assert.assertEquals(segmentV1.getVersion(), pendingSegmentV11.getVersion());

Assert.assertNotEquals(pendingSegmentV01, pendingSegmentV11);

// Commit a segment for version V2 to overshadow V1
final DataSegment segmentV2
= DataSegment.builder()
.dataSource(DATA_SOURCE)
.interval(Granularities.HOUR.bucket(PARTY_TIME))
.version(PARTY_TIME.plusDays(2).toString())
.shardSpec(new LinearShardSpec(0))
.size(100)
.build();
taskActionTestKit.getMetadataStorageCoordinator().commitSegments(
Collections.singleton(segmentV2)
);
Assert.assertTrue(segmentV2.getVersion().compareTo(segmentV1.getVersion()) > 0);

// Verify that new segment allocations use version V2
final SegmentIdWithShardSpec pendingSegmentV21 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);
final SegmentIdWithShardSpec pendingSegmentV22 =
allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, sequenceName, null);
assertSameIdentifier(pendingSegmentV21, pendingSegmentV22);
Assert.assertEquals(segmentV2.getVersion(), pendingSegmentV21.getVersion());

Assert.assertNotEquals(pendingSegmentV21, pendingSegmentV01);
Assert.assertNotEquals(pendingSegmentV21, pendingSegmentV11);
}

@Test
public void testMultipleSequences()
{
Expand Down
Loading

0 comments on commit feeb4f0

Please sign in to comment.