From aeecbc874fba5d6422d20f630a1fd4cbece8fb8a Mon Sep 17 00:00:00 2001 From: Michael Wallace Date: Tue, 4 Jul 2023 22:23:15 +0100 Subject: [PATCH] Add support for recovery of AWS snapshot backups Adds the additional functionality required for barman and barman-cloud-restore to be able to recover AWS snapshot backups. This is achieved by: - Adding the `--aws-region` argument to `barman recover` and `barman-cloud-restore`. This is used to find the recovery instance and attached disks during the verification phase of the recovery. - Passing the source snapshot name from the volume metadata to the AwsVolumeMetadata constructor so that it can be used by the recovery executor to verify that a disk cloned from that snapshot is attached to the recovery instance and correctly mounted. Closes BAR-24. --- barman/cli.py | 6 +++ barman/clients/cloud_restore.py | 9 +++- barman/cloud_providers/__init__.py | 9 ++++ barman/cloud_providers/aws_s3.py | 16 ++++-- doc/barman-cloud-restore.1 | 5 +- doc/barman-cloud-restore.1.md | 5 +- doc/barman.1 | 8 +++ doc/barman.1.d/50-recover.md | 5 ++ doc/manual/28-snapshots.en.md | 2 +- doc/manual/50-feature-details.en.md | 17 ++++++ doc/manual/55-barman-cli.en.md | 4 ++ tests/test_barman_cloud_restore.py | 19 +++++++ tests/test_cli.py | 1 + tests/test_cloud_snapshot_interface.py | 73 ++++++++++++++++++++++++-- 14 files changed, 167 insertions(+), 12 deletions(-) diff --git a/barman/cli.py b/barman/cli.py index 1664f70ab..de1062721 100644 --- a/barman/cli.py +++ b/barman/cli.py @@ -796,6 +796,11 @@ def rebuild_xlogdb(args): help="Azure resource group containing the instance and disks for recovery " "of a snapshot backup", ), + argument( + "--aws-region", + help="The name of the AWS region containing the EC2 VM and storage " + "volumes for recovery of a snapshot backup", + ), ] ) def recover(args): @@ -966,6 +971,7 @@ def recover(args): args.gcp_zone = args.snapshot_recovery_zone # Override provider-specific options in the config for arg in ( + "aws_region", "azure_resource_group", "gcp_zone", ): diff --git a/barman/clients/cloud_restore.py b/barman/clients/cloud_restore.py index 3234772cd..38033426f 100644 --- a/barman/clients/cloud_restore.py +++ b/barman/clients/cloud_restore.py @@ -131,7 +131,7 @@ def parse_arguments(args=None): :return: The options parsed """ - parser, _, azure_arguments = create_argument_parser( + parser, s3_arguments, azure_arguments = create_argument_parser( description="This script can be used to download a backup " "previously made with barman-cloud-backup command." "Currently AWS S3, Azure Blob Storage and Google Cloud Storage are supported.", @@ -157,6 +157,13 @@ def parse_arguments(args=None): ), dest="gcp_zone", ) + s3_arguments.add_argument( + "--aws-region", + help=( + "Name of the AWS region where the instance and disks for snapshot " + "recovery are located" + ), + ) gcs_arguments = parser.add_argument_group( "Extra options for google-cloud-storage cloud provider" ) diff --git a/barman/cloud_providers/__init__.py b/barman/cloud_providers/__init__.py index 286b6f66d..2a77bae3a 100644 --- a/barman/cloud_providers/__init__.py +++ b/barman/cloud_providers/__init__.py @@ -304,6 +304,15 @@ def get_snapshot_interface_from_backup_info(backup_info, config=None): resource_group=resource_group, credential=_get_azure_credential(config.azure_credential), ) + elif backup_info.snapshots_info.provider == "aws": + from barman.cloud_providers.aws_s3 import AwsCloudSnapshotInterface + + region = None + profile = None + if config is not None and hasattr(config, "aws_region"): + region = config.aws_region + profile = config.aws_profile + return AwsCloudSnapshotInterface(profile, region) else: raise CloudProviderUnsupported( "Unsupported snapshot provider in backup info: %s" diff --git a/barman/cloud_providers/aws_s3.py b/barman/cloud_providers/aws_s3.py index e52247721..d5f879cbe 100644 --- a/barman/cloud_providers/aws_s3.py +++ b/barman/cloud_providers/aws_s3.py @@ -657,10 +657,14 @@ def _get_requested_volumes(self, instance_metadata, disks=None): == instance_metadata["RootDeviceName"] ): continue + snapshot_id = None + if "SnapshotId" in volume and volume["SnapshotId"] != "": + snapshot_id = volume["SnapshotId"] requested_volumes.append( { "identifier": volume_identifier, "attachment_metadata": attachment_metadata, + "source_snapshot": snapshot_id, } ) return requested_volumes @@ -819,6 +823,7 @@ def get_attached_volumes( attached_volumes[requested_volume["identifier"]] = AwsVolumeMetadata( requested_volume["attachment_metadata"], virtualization_type=instance_metadata["VirtualizationType"], + source_snapshot=requested_volume["source_snapshot"], ) if disks is not None and fail_on_missing: @@ -861,7 +866,9 @@ class AwsVolumeMetadata(VolumeMetadata): mount point and mount options for the volume. """ - def __init__(self, attachment_metadata=None, virtualization_type=None): + def __init__( + self, attachment_metadata=None, virtualization_type=None, source_snapshot=None + ): """ Creates an AwsVolumeMetadata instance using metadata obtained from the AWS API. @@ -869,6 +876,8 @@ def __init__(self, attachment_metadata=None, virtualization_type=None): metadata for this volume. :param str virtualization_type: The type of virtualzation used by the VM to which this volume is attached - either "hvm" or "paravirtual". + :param str source_snapshot: The snapshot ID of the source snapshot from which + volume was created. """ super(AwsVolumeMetadata, self).__init__() # The `id` property is used to store the volume ID so that we always have a @@ -877,6 +886,7 @@ def __init__(self, attachment_metadata=None, virtualization_type=None): self.id = None self._device_name = None self._virtualization_type = virtualization_type + self._source_snapshot = source_snapshot if attachment_metadata: if "Device" in attachment_metadata: self._device_name = attachment_metadata["Device"] @@ -940,9 +950,9 @@ def source_snapshot(self): An identifier which can reference the snapshot via the cloud provider. :rtype: str - :return: The snapshot short name. + :return: The snapshot ID """ - raise NotImplementedError() + return self._source_snapshot class AwsSnapshotMetadata(SnapshotMetadata): diff --git a/doc/barman-cloud-restore.1 b/doc/barman-cloud-restore.1 index 86ad62c50..f7b71d2e9 100644 --- a/doc/barman-cloud-restore.1 +++ b/doc/barman-cloud-restore.1 @@ -35,7 +35,7 @@ usage:\ barman\-cloud\-restore\ [\-V]\ [\-\-help]\ [\-v\ |\ \-q]\ [\-t] \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-tablespace\ NAME:LOCATION] \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-snapshot\-recovery\-instance\ SNAPSHOT_RECOVERY_INSTANCE] \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-snapshot\-recovery\-zone\ GCP_ZONE] -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-gcp\-zone\ GCP_ZONE] +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-aws\-region\ AWS_REGION]\ [\-\-gcp\-zone\ GCP_ZONE] \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ [\-\-azure\-resource\-group\ AZURE_RESOURCE_GROUP] \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ source_url\ server_name\ backup_id\ recovery_dir @@ -79,6 +79,9 @@ Extra\ options\ for\ the\ aws\-s3\ cloud\ provider: \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ time\ in\ seconds\ until\ a\ timeout\ is\ raised\ when \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ waiting\ to\ read\ from\ a\ connection\ (defaults\ to\ 60 \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ seconds) +\ \ \-\-aws\-region\ AWS_REGION +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Name\ of\ the\ AWS\ region\ where\ the\ instance\ and\ disks +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ for\ snapshot\ recovery\ are\ located Extra\ options\ for\ the\ azure\-blob\-storage\ cloud\ provider: \ \ \-\-azure\-credential\ {azure\-cli,managed\-identity},\ \-\-credential\ {azure\-cli,managed\-identity} diff --git a/doc/barman-cloud-restore.1.md b/doc/barman-cloud-restore.1.md index b2d68f736..eb0a0a882 100644 --- a/doc/barman-cloud-restore.1.md +++ b/doc/barman-cloud-restore.1.md @@ -37,7 +37,7 @@ usage: barman-cloud-restore [-V] [--help] [-v | -q] [-t] [--tablespace NAME:LOCATION] [--snapshot-recovery-instance SNAPSHOT_RECOVERY_INSTANCE] [--snapshot-recovery-zone GCP_ZONE] - [--gcp-zone GCP_ZONE] + [--aws-region AWS_REGION] [--gcp-zone GCP_ZONE] [--azure-resource-group AZURE_RESOURCE_GROUP] source_url server_name backup_id recovery_dir @@ -81,6 +81,9 @@ Extra options for the aws-s3 cloud provider: the time in seconds until a timeout is raised when waiting to read from a connection (defaults to 60 seconds) + --aws-region AWS_REGION + Name of the AWS region where the instance and disks + for snapshot recovery are located Extra options for the azure-blob-storage cloud provider: --azure-credential {azure-cli,managed-identity}, --credential {azure-cli,managed-identity} diff --git a/doc/barman.1 b/doc/barman.1 index 2fc22ad7a..49e3d0239 100644 --- a/doc/barman.1 +++ b/doc/barman.1 @@ -646,6 +646,14 @@ This option can be used to override the value of \f[C]azure_resource_group\f[] in the Barman config. .RS .RE +.TP +.B \-\-aws\-region \f[I]REGION_NAME\f[] +Name of the AWS region where the instance and disks for snapshot +recovery are located. +This option can be used to override the value of \f[C]aws_region\f[] in +the Barman config. +.RS +.RE .RE .TP .B replication\-status \f[I][OPTIONS]\f[] \f[I]SERVER_NAME\f[] diff --git a/doc/barman.1.d/50-recover.md b/doc/barman.1.d/50-recover.md index 45decd788..965d55eea 100644 --- a/doc/barman.1.d/50-recover.md +++ b/doc/barman.1.d/50-recover.md @@ -131,3 +131,8 @@ recover *\[OPTIONS\]* *SERVER_NAME* *BACKUP_ID* *DESTINATION_DIRECTORY* : Name of the Azure resource group containing the instance and disks for snapshot recovery. This option can be used to override the value of `azure_resource_group` in the Barman config. + + --aws-region *REGION_NAME* + : Name of the AWS region where the instance and disks for snapshot + recovery are located. This option can be used to override the value of + `aws_region` in the Barman config. diff --git a/doc/manual/28-snapshots.en.md b/doc/manual/28-snapshots.en.md index 9cac2b038..ba26376be 100644 --- a/doc/manual/28-snapshots.en.md +++ b/doc/manual/28-snapshots.en.md @@ -106,7 +106,7 @@ snapshot_provider = gcp ``` Currently Google Cloud Platform (`gcp`) and Microsoft Azure (`azure`) are fully supported. -Snapshot backups are supported using AWS however *support for recovery/restore and deletion of AWS snapshot backups is not yet implemented*. +Snapshot backups are supported using AWS however *support for deletion of AWS snapshot backups is not yet implemented*. The following parameters must be set regardless of cloud provider: diff --git a/doc/manual/50-feature-details.en.md b/doc/manual/50-feature-details.en.md index 5c88f0ba1..75e68ac59 100644 --- a/doc/manual/50-feature-details.en.md +++ b/doc/manual/50-feature-details.en.md @@ -1104,6 +1104,10 @@ The following additional `barman recover` arguments are available with the `azur - `--azure-resource-group`: The resource group to which the recovery instance belongs. If not provided then Barman will use the value of `azure_resource_group` set in the server config. +The following additional `barman recover` arguments are available with the `aws` provider: + +- `--aws-region`: The AWS region in which the recovery instance is located. If not provided then Barman will use the value of `aws_region` set in the server config. + Note the following `barman recover` arguments / config variables are unavailable when recovering snapshot backups: | **Command argument** | **Config variable** . | @@ -1211,3 +1215,16 @@ The following fields are available in `snapshots_info/snapshots/*/provider`: - `location`: The Azure location of the disk from which the snapshot was taken. - `lun`: The LUN identifying the disk from which the snapshot was taken at the time of the backup. - `snapshot_name`: The name of the snapshot. + +#### AWS provider-specific metadata + +The following fields are available in `snapshots_info/provider_info`: + +- `account_id`: The ID of the AWS account which owns the resources used to make the backup. +- `region`: The AWS region in which the resources involved in backup are located. + +The following fields are available in `snapshots_info/snapshots/*/provider`: + +- `device_name`: The device to which the source disk was mapped on the backup VM at the time of the backup. +- `snapshot_id`: The ID of the snapshot as assigned by AWS. +- `snapshot_name`: The name of the snapshot. diff --git a/doc/manual/55-barman-cli.en.md b/doc/manual/55-barman-cli.en.md index be5f2a20f..984c54f78 100644 --- a/doc/manual/55-barman-cli.en.md +++ b/doc/manual/55-barman-cli.en.md @@ -252,4 +252,8 @@ The following additional arguments are required with the `azure` provider: - `--azure-resource-group` +The following additional argument is available with the `aws-s3` provider: + +- `--aws-region` + The `--tablespace` option cannot be used with `barman-cloud-restore` when restoring a cloud snapshot backup: diff --git a/tests/test_barman_cloud_restore.py b/tests/test_barman_cloud_restore.py index 460ca00dc..7d0754e95 100644 --- a/tests/test_barman_cloud_restore.py +++ b/tests/test_barman_cloud_restore.py @@ -187,6 +187,21 @@ def test_restore_calls_backup_downloader_with_parsed_id( "relocation rules cannot be used." ), ], + [ + "aws", + [ + "--snapshot-recovery-instance", + "test_instance", + "--aws-profile", + "test_profile", + "--tablespace", + "tbs1:/path/to/tbs1", + ], + ( + "Backup {backup_id} is a snapshot backup therefore tablespace " + "relocation rules cannot be used." + ), + ], ), ) @mock.patch("barman.clients.cloud_restore.CloudBackupCatalog") @@ -242,6 +257,10 @@ def test_unsupported_snapshot_args( "azure", ["--azure-resource-group", "test_resource_group"], ), + ( + "aws", + ["--aws-region", "test_region"], + ), ), ) @mock.patch("barman.clients.cloud_restore.get_snapshot_interface_from_backup_info") diff --git a/tests/test_cli.py b/tests/test_cli.py index f307ef1bb..dd7c79635 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -754,6 +754,7 @@ def test_recover_recovery_instance_kwarg_not_passed( ( ("gcp_zone", "snapshot_recovery_zone"), ("azure_resource_group", None), + ("aws_region", None), ), ) @patch("barman.cli.parse_backup_id") diff --git a/tests/test_cloud_snapshot_interface.py b/tests/test_cloud_snapshot_interface.py index 083e47b61..ce38645dc 100644 --- a/tests/test_cloud_snapshot_interface.py +++ b/tests/test_cloud_snapshot_interface.py @@ -133,11 +133,12 @@ def test_from_config_azure_no_subscription_id(self): @pytest.mark.parametrize( ("snapshot_provider", "interface_cls"), [ - ("aws", None), + ("aws", AwsCloudSnapshotInterface), ("azure", AzureCloudSnapshotInterface), ("gcp", GcpCloudSnapshotInterface), ], ) + @mock.patch("barman.cloud_providers.aws_s3.boto3") @mock.patch("barman.cloud_providers._get_azure_credential") @mock.patch("barman.cloud_providers.azure_blob_storage.import_azure_mgmt_compute") @mock.patch( @@ -148,6 +149,7 @@ def test_from_backup_info_cloud_provider( _mock_google_cloud_compute, _mock_azure_mgmt_compute, _mock_get_azure_credential, + _mock_boto3, snapshot_provider, interface_cls, ): @@ -2991,6 +2993,40 @@ def test_get_attached_volumes(self, mock_ec2_client): # AND the root volume was not included assert root_disk["id"] not in volumes + def test_get_attached_volumes_with_source_snapshots(self, mock_ec2_client): + """ + Verify that attached volumes contain snapshot IDs when the AWS response + includes a snapshot ID for that volume. + """ + # GIVEN a mock snapshots interface + snapshot_interface = AwsCloudSnapshotInterface(region=self.aws_region) + # AND a mock EC2 client which returns an instance with the required disks + # attached + mock_ec2_client.describe_instances.return_value = ( + self._get_mock_describe_instances_resp( + self.aws_disks, + ) + ) + # AND the mock EC2 client returns describe_volume_responses for these disks + mock_ec2_client.describe_volumes.return_value = ( + self._get_mock_describe_volumes_resp(self.aws_disks) + ) + # AND one of those disks has a SnapshotId + mock_ec2_client.describe_volumes.return_value["Volumes"][0][ + "SnapshotId" + ] = "snap-0123" + + # WHEN get_attached_volumes is called + volumes = snapshot_interface.get_attached_volumes(self.aws_instance_id) + + # THEN the source snapshot is set on the volume which had a SnapshotId + assert volumes[self.aws_disks[0]["id"]].source_snapshot == "snap-0123" + + # AND the source snapshot is not set on the other volumes + assert all( + volumes[disk["id"]].source_snapshot is None for disk in self.aws_disks[1:] + ) + def test_get_attached_volumes_for_disks(self, mock_ec2_client): """ Verify that the requested disks are returned as a dict keyed by the expected @@ -3203,18 +3239,40 @@ class TestAwsVolumeMetadata(object): ( "attachment_metadata", "virtualization_type", + "source_snapshot", "expected_virtualization_type", + "expected_source_snapshot", "expected_device_name", "expected_id", ), ( - (None, None, None, None, None), - ({}, None, None, None, None), - ({}, "hvm", "hvm", None, None), + (None, None, None, None, None, None, None), + ({}, None, None, None, None, None, None), + ({}, "hvm", None, "hvm", None, None, None), ( {"Device": "/dev/xvdf", "VolumeId": "vol-0123"}, "hvm", + None, "hvm", + None, + "/dev/xvdf", + "vol-0123", + ), + ( + {"Device": "/dev/xvdf", "VolumeId": "vol-0123"}, + None, + "snap-0123", + None, + "snap-0123", + "/dev/xvdf", + "vol-0123", + ), + ( + {"Device": "/dev/xvdf", "VolumeId": "vol-0123"}, + "hvm", + "snap-0123", + "hvm", + "snap-0123", "/dev/xvdf", "vol-0123", ), @@ -3224,15 +3282,20 @@ def test_init( self, attachment_metadata, virtualization_type, + source_snapshot, expected_virtualization_type, + expected_source_snapshot, expected_device_name, expected_id, ): """Verify AwsVolumeMetadata is created from the supplied data.""" # WHEN an AwsVolumeMetadata is created - volume = AwsVolumeMetadata(attachment_metadata, virtualization_type) + volume = AwsVolumeMetadata( + attachment_metadata, virtualization_type, source_snapshot + ) # THEN the resulting objecth as the expected properties assert volume._virtualization_type == expected_virtualization_type + assert volume.source_snapshot == expected_source_snapshot assert volume._device_name == expected_device_name assert volume.id == expected_id