Skip to content

Commit

Permalink
Remove EBS Attach for RFS in GovCloud (#1216)
Browse files Browse the repository at this point in the history
Signed-off-by: Andre Kurait <[email protected]>
  • Loading branch information
AndreKurait authored Jan 6, 2025
1 parent 6871136 commit 9781a9b
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export class MigrationServiceCore extends Stack {
props.taskRolePolicies?.forEach(policy => this.serviceTaskRole.addToPolicy(policy))

const serviceTaskDef = new FargateTaskDefinition(this, "ServiceTaskDef", {
ephemeralStorageGiB: props.ephemeralStorageGiB ? props.ephemeralStorageGiB : 75,
ephemeralStorageGiB: Math.max(props.ephemeralStorageGiB ? props.ephemeralStorageGiB : 75, 21), // valid values 21 - 200
runtimePlatform: {
operatingSystemFamily: OperatingSystemFamily.LINUX,
cpuArchitecture: props.cpuArchitecture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import {
createOpenSearchServerlessIAMAccessPolicy,
getSecretAccessPolicy,
getMigrationStringParameterValue,
ClusterAuth, parseArgsToDict, appendArgIfNotInExtraArgs
ClusterAuth, parseArgsToDict, appendArgIfNotInExtraArgs, isStackInGovCloud
} from "../common-utilities";
import { RFSBackfillYaml, SnapshotYaml } from "../migration-services-yaml";
import { OtelCollectorSidecar } from "./migration-otel-collector-sidecar";
Expand Down Expand Up @@ -116,7 +116,9 @@ export class ReindexFromSnapshotStack extends MigrationServiceCore {
const extraArgsDict = parseArgsToDict(props.extraArgs)
const storagePath = "/storage"
const planningSize = props.maxShardSizeGiB ?? 80;
const maxShardSizeBytes = planningSize * 1024 * 1024 * 1024 * 1.10 // Add 10% buffer
const planningSizeBuffer = 1.10
const maxShardSizeGiB = planningSize * planningSizeBuffer
const maxShardSizeBytes = maxShardSizeGiB * (1024 ** 3)
command = appendArgIfNotInExtraArgs(command, extraArgsDict, "--s3-local-dir", `"${storagePath}/s3_files"`)
command = appendArgIfNotInExtraArgs(command, extraArgsDict, "--s3-repo-uri", `"${s3Uri}"`)
command = appendArgIfNotInExtraArgs(command, extraArgsDict, "--s3-region", this.region)
Expand Down Expand Up @@ -170,40 +172,60 @@ export class ReindexFromSnapshotStack extends MigrationServiceCore {

// Calculate the volume size based on the max shard size
// Have space for the snapshot and an unpacked copy, with buffer
const volumeSizeGB = Math.max(
Math.ceil(maxShardSizeBytes/(1000**3) * 2 * 1.15),
const shardVolumeSizeGiBBufferMultiple = 1.10
const shardVolumeSizeGiB = Math.max(
Math.ceil(maxShardSizeGiB * 2 * shardVolumeSizeGiBBufferMultiple),
1
)

if (volumeSizeGB > 16000) {
if (shardVolumeSizeGiB > (16*1024)) {
// 16 TiB is the maximum volume size for GP3
throw new Error(`"Your max shard size of ${props.maxShardSizeGiB} GiB is too large to migrate."`)
}

// Volume we'll use to download and unpack the snapshot
const snapshotVolume = new ServiceManagedVolume(this, 'SnapshotVolume', {
name: 'snapshot-volume',
managedEBSVolume: {
size: Size.gibibytes(volumeSizeGB),
volumeType: EbsDeviceVolumeType.GP3,
fileSystemType: FileSystemType.XFS,
throughput: props.reindexFromSnapshotWorkerSize === "maximum" ? 450 : 125,
tagSpecifications: [{
tags: {
Name: `rfs-snapshot-volume-${props.stage}`,
},
propagateTags: EbsPropagatedTagSource.SERVICE,
}],
encrypted: true,
},
});
// Reserve 5 GiB of storage for system
const systemStorageGiB = 5
let ephemeralStorageGiB = systemStorageGiB
if (isStackInGovCloud(this)) {
// ECS EBS attachment is not supported in GovCloud
// https://docs.aws.amazon.com/govcloud-us/latest/UserGuide/govcloud-ecs.html#govcloud-ecs-diffs
// Use Ephemeral Storage instead, adding size for shard
ephemeralStorageGiB = Math.ceil(shardVolumeSizeGiB + systemStorageGiB)
const maxSupportedEphemeralStorageGiB = 200
if (ephemeralStorageGiB > maxSupportedEphemeralStorageGiB) {
// Reverse calculations above for max size
const maxGovCloudSupportedShardSizeGiB = Math.floor((maxSupportedEphemeralStorageGiB-systemStorageGiB)
/2/shardVolumeSizeGiBBufferMultiple/planningSizeBuffer)
throw new Error(`Your max shard size of ${props.maxShardSizeGiB} GiB is too large to migrate ` +
`in GovCloud, the max supported is ${maxGovCloudSupportedShardSizeGiB} GiB.`)
}
}
else {
// Volume we'll use to download and unpack the snapshot
const snapshotVolume = new ServiceManagedVolume(this, 'SnapshotVolume', {
name: 'snapshot-volume',
managedEBSVolume: {
size: Size.gibibytes(shardVolumeSizeGiB),
volumeType: EbsDeviceVolumeType.GP3,
fileSystemType: FileSystemType.XFS,
throughput: props.reindexFromSnapshotWorkerSize === "maximum" ? 450 : 125,
tagSpecifications: [{
tags: {
Name: `rfs-snapshot-volume-${props.stage}`,
},
propagateTags: EbsPropagatedTagSource.SERVICE,
}],
encrypted: true,
},
});

volumes.push(snapshotVolume);
mountPoints.push({
containerPath: storagePath,
readOnly: false,
sourceVolume: snapshotVolume.name,
});
volumes.push(snapshotVolume);
mountPoints.push({
containerPath: storagePath,
readOnly: false,
sourceVolume: snapshotVolume.name,
});
}

this.createService({
serviceName: 'reindex-from-snapshot',
Expand All @@ -217,6 +239,7 @@ export class ReindexFromSnapshotStack extends MigrationServiceCore {
cpuArchitecture: props.fargateCpuArch,
taskCpuUnits: props.reindexFromSnapshotWorkerSize === "maximum" ? 16 * 1024 : 2 * 1024,
taskMemoryLimitMiB: props.reindexFromSnapshotWorkerSize === "maximum" ? 32 * 1024 : 4 * 1024,
ephemeralStorageGiB: ephemeralStorageGiB,
environment: {
"RFS_COMMAND": command,
"RFS_TARGET_USER": targetUser,
Expand Down
16 changes: 8 additions & 8 deletions deployment/cdk/opensearch-service-migration/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ In all other cases, the required components of each cluster object are:

### Reindex from Snapshot (RFS) Service Options

| Name | Type | Example | Description |
| ----------------------------------- | ------- | -------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| reindexFromSnapshotServiceEnabled | boolean | true | Create resources for deploying and configuring the RFS ECS service |
| reindexFromSnapshotExtraArgs | string | "--target-aws-region us-east-1 --target-aws-service-signing-name es" | Extra arguments to provide to the Document Migration command with space separation. See [RFS Arguments](../../../DocumentsFromSnapshotMigration/README.md#Arguments). [^1] |
| sourceClusterEndpoint | string | `"https://source-cluster.elb.us-east-1.endpoint.com"` | The endpoint for the source cluster from which RFS will take a snapshot |
| managedServiceSourceSnapshotEnabled | boolean | true | Create the necessary roles and trust relationships to take a snapshot of a managed service source cluster. This is only compatible with SigV4 auth. Default as true if not specified and source cluster is set with sigv4 auth. |
| reindexFromSnapshotMaxShardSizeGiB | integer | 80 | OPTIONAL: The size, in whole GiB, of the largest shard you want to migrate across all indices; used to ensure we have enough disk space reserved to perform the migration. Default: 80 GiB |
| reindexFromSnapshotWorkerSize | enum | default | maximum | OPTIONAL: default provisions a 2vCPU worker balancing speed with cost efficiency designed for most migrations with horizontal scaling, maximum provisions a 16vCPU worker for high throughput migrations when parallelization is limited (low source shard count). Default: default |
| Name | Type | Example | Description |
|-------------------------------------|---------|----------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| reindexFromSnapshotServiceEnabled | boolean | true | Create resources for deploying and configuring the RFS ECS service |
| reindexFromSnapshotExtraArgs | string | "--target-aws-region us-east-1 --target-aws-service-signing-name es" | Extra arguments to provide to the Document Migration command with space separation. See [RFS Arguments](../../../DocumentsFromSnapshotMigration/README.md#Arguments). [^1] |
| sourceClusterEndpoint | string | `"https://source-cluster.elb.us-east-1.endpoint.com"` | The endpoint for the source cluster from which RFS will take a snapshot |
| managedServiceSourceSnapshotEnabled | boolean | true | Create the necessary roles and trust relationships to take a snapshot of a managed service source cluster. This is only compatible with SigV4 auth. Default as true if not specified and source cluster is set with sigv4 auth. |
| reindexFromSnapshotMaxShardSizeGiB | integer | 80 | OPTIONAL: The size, in whole GiB, of the largest shard you want to migrate across all indices; used to ensure we have enough disk space reserved to perform the migration. Must be <= 80 if deploying in GovCloud region. Default: 80 GiB |
| reindexFromSnapshotWorkerSize | enum | default or maximum | OPTIONAL: default provisions a 2vCPU worker balancing speed with cost efficiency designed for most migrations with horizontal scaling, maximum provisions a 16vCPU worker for high throughput migrations when parallelization is limited (low source shard count). Default: default |

### VPC Options

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,16 +382,88 @@ describe('ReindexFromSnapshotStack Tests', () => {
VolumeConfigurations: volumesCapture,
});
const volumes = volumesCapture.asArray();
expect(volumes).toEqual(
expect.arrayContaining([
expect.objectContaining({
ManagedEBSVolume: expect.objectContaining({
Encrypted: true,
SizeInGiB: 218,
Throughput: 450,
}),
expect(volumes).toEqual(
expect.arrayContaining([
expect.objectContaining({
ManagedEBSVolume: expect.objectContaining({
Encrypted: true,
SizeInGiB: 194,
Throughput: 450,
}),
])
);
}),
])
);
const volumeCapture = new Capture();
template.hasResourceProperties('AWS::ECS::TaskDefinition', {
Volumes: volumeCapture,
});
// Ensure there are 2 volumes, ebs and ephemeral
expect(volumeCapture.asArray().length).toBe(2);
});

test('ReindexFromSnapshotStack configures ephemeral storage in GovCloud', () => {
const contextOptions = {
vpcEnabled: true,
reindexFromSnapshotServiceEnabled: true,
stage: 'unit-test',
sourceCluster: {
"endpoint": "https://test-cluster",
"auth": {"type": "none"},
"version": "ES_7.10"
},
migrationAssistanceEnabled: true,
};
const stacks = createStackComposer(contextOptions, undefined, 'us-gov-west-1');
const reindexStack = stacks.stacks.find(s => s instanceof ReindexFromSnapshotStack) as ReindexFromSnapshotStack;
expect(reindexStack).toBeDefined();
expect(reindexStack.region).toEqual("us-gov-west-1");
const template = Template.fromStack(reindexStack);

const taskDefinitionCapture = new Capture();
template.hasResourceProperties('AWS::ECS::TaskDefinition', {
ContainerDefinitions: taskDefinitionCapture,
});

const containerDefinitions = taskDefinitionCapture.asArray();
expect(containerDefinitions.length).toBe(1);
expect(containerDefinitions[0].Command).toEqual([
'/bin/sh',
'-c',
'/rfs-app/entrypoint.sh'
]);

const ephemeralStorageCapture = new Capture();
template.hasResourceProperties('AWS::ECS::TaskDefinition', {
EphemeralStorage: ephemeralStorageCapture,
});

const ephemeralStorage = ephemeralStorageCapture.asObject();
expect(ephemeralStorage.SizeInGiB).toBe(199);

const volumeCapture = new Capture();
template.hasResourceProperties('AWS::ECS::TaskDefinition', {
Volumes: volumeCapture,
});
// Ensure the only volume is the ephemeral storage
expect(volumeCapture.asArray().length).toBe(1);
});

test('ReindexFromSnapshotStack throws error for large shard size in GovCloud', () => {
const contextOptions = {
vpcEnabled: true,
reindexFromSnapshotServiceEnabled: true,
stage: 'unit-test',
reindexFromSnapshotMaxShardSizeGiB: 81, // Exceeding the limit
sourceCluster: {
"endpoint": "https://test-cluster",
"auth": {"type": "none"},
"version": "ES_7.10"
},
migrationAssistanceEnabled: true,
};

expect(() => createStackComposer(contextOptions, undefined, 'us-gov-west-1')).toThrowError(
/Your max shard size of 81 GiB is too large to migrate in GovCloud, the max supported is 80 GiB/
);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {StackComposer} from "../lib/stack-composer";
import {App} from "aws-cdk-lib";

// eslint-disable-next-line @typescript-eslint/no-explicit-any
export function createStackComposer(contextBlock: Record<string, any>, migrationsUserAgent?: string) {
export function createStackComposer(contextBlock: Record<string, any>, migrationsUserAgent?: string, region?: string): StackComposer {
contextBlock.stage = "unit-test"
const app = new App({
context: {
Expand All @@ -11,7 +11,7 @@ export function createStackComposer(contextBlock: Record<string, any>, migration
}
})
return new StackComposer(app, {
env: {account: "test-account", region: "us-east-1"},
env: {account: "test-account", region: region ?? "us-east-1"},
migrationsSolutionVersion: "1.0.0",
migrationsUserAgent: migrationsUserAgent
})
Expand Down

0 comments on commit 9781a9b

Please sign in to comment.