From 00580463fa456d91a04f826daaf1ee867c184cae Mon Sep 17 00:00:00 2001 From: Kanstantsin Kuzmin Date: Tue, 10 Dec 2024 16:35:13 +0000 Subject: [PATCH 1/3] Count supported managed DB resources. --- aws_resource_count.py | 83 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/aws_resource_count.py b/aws_resource_count.py index 0bed6f1..10680cc 100644 --- a/aws_resource_count.py +++ b/aws_resource_count.py @@ -181,6 +181,54 @@ def get_region_cluster_nodes(session: CoveSession, service_name: str, region_nam return count + +@retry +def get_region_rds_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: + supported_db_engines = ["postgres", "mysql", "mongodb"] + if hasattr(session, "session_information"): + region_name = session.session_information['Region'] + client = session.client("rds", region_name=region_name) + paginator = client.get_paginator("describe_db_instances") + count = 0 + clusters_set = set() + for page in paginator.paginate(): + for db_instance in page["DBInstances"]: + db_engine = db_instance["Engine"] + db_engine_normalized = db_engine.replace("aurora-", "").replace("postgresql", "postgres") + if db_engine_normalized in supported_db_engines: + if not db_instance.get('ReadReplicaSourceDBInstanceIdentifier'): + if cluster_name := db_instance.get('DBClusterIdentifier'): + clusters_set.add(cluster_name) + else: + count += 1 + count += len(clusters_set) + return count + + +@retry +def get_region_dynamodb_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: + if hasattr(session, "session_information"): + region_name = session.session_information['Region'] + client = session.client("dynamodb", region_name=region_name) + paginator = client.get_paginator("list_tables") + count = 0 + for page in paginator.paginate(): + count += len(page["TableNames"]) + return count + + +@retry +def get_region_redshift_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: + if hasattr(session, "session_information"): + region_name = session.session_information['Region'] + client = session.client("redshift", region_name=region_name) + paginator = client.get_paginator("describe_clusters") + count = 0 + for page in paginator.paginate(): + count += len(page["Clusters"]) + return count + + SERVICES_CONF: Dict[str, Any] = { "ec2": { "function": get_region_instances, @@ -211,7 +259,22 @@ def get_region_cluster_nodes(session: CoveSession, service_name: str, region_nam "function": get_region_cluster_nodes, "display_name": "Container Hosts", "workload_units": 1 - } + }, + "rds": { + "function": get_region_rds_resources, + "display_name": "RDS Instances and Clusters", + "workload_units": 1 + }, + "dynamodb": { + "function": get_region_dynamodb_resources, + "display_name": "DynamoDB Tables", + "workload_units": 50 + }, + "redshift": { + "function": get_region_redshift_resources, + "display_name": "Redshift Clusters", + "workload_units": 50 + }, } ALL_REGIONS = [r["RegionName"] for r in boto3.client("ec2").describe_regions()["Regions"]] @@ -281,6 +344,15 @@ def set_skip_resources(args: argparse.Namespace) -> None: if args.skip_container_hosts: skipped_resources.append(SERVICES_CONF["eks"]['display_name']) SERVICES_CONF.pop("eks") + if args.skip_rds: + skipped_resources.append(SERVICES_CONF["rds"]['display_name']) + SERVICES_CONF.pop("rds") + if args.skip_dynamodb: + skipped_resources.append(SERVICES_CONF["dynamodb"]['display_name']) + SERVICES_CONF.pop("dynamodb") + if args.skip_redshift: + skipped_resources.append(SERVICES_CONF["redshift"]['display_name']) + SERVICES_CONF.pop("redshift") if skipped_resources: logger.info(f"Skip counting the following resources: {', '.join(skipped_resources)}.") @@ -311,6 +383,15 @@ def main(): _parser.add_argument("--skip-container-hosts", action="store_true", help=f"Skip counting {SERVICES_CONF['eks']['display_name']}") + _parser.add_argument("--skip-rds", action="store_true", + help=f"Skip counting {SERVICES_CONF['rds']['display_name']}") + + _parser.add_argument("--skip-dynamodb", action="store_true", + help=f"Skip counting {SERVICES_CONF['dynamodb']['display_name']}") + + _parser.add_argument("--skip-redshift", action="store_true", + help=f"Skip counting {SERVICES_CONF['redshift']['display_name']}") + _parser.add_argument("--show-logs-per-account", action="store_true", help=f"Log resource count per AWS account") From 7e5331252224d1d22e8470483ec5de3c137f0d2e Mon Sep 17 00:00:00 2001 From: Kanstantsin Kuzmin Date: Wed, 11 Dec 2024 12:39:46 +0000 Subject: [PATCH 2/3] Address comments --- aws_resource_count.py | 58 ++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/aws_resource_count.py b/aws_resource_count.py index 10680cc..eddd19b 100644 --- a/aws_resource_count.py +++ b/aws_resource_count.py @@ -24,6 +24,8 @@ has_enumeration_errors: bool = False +MAX_DB_SIZE_GB=1024 + @dataclass class VmImage: @@ -70,7 +72,7 @@ def wrapper(*args, **kwargs): @retry -def get_region_serverless_containers(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_serverless_containers(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("ecs", region_name=region_name) @@ -87,7 +89,7 @@ def get_region_serverless_containers(session: CoveSession, service_name: str, re @retry -def get_region_instances(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_instances(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("ec2", region_name=region_name) @@ -100,7 +102,7 @@ def get_region_instances(session: CoveSession, service_name: str, region_name: O @retry -def get_region_functions(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_functions(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("lambda", region_name=region_name) @@ -112,7 +114,7 @@ def get_region_functions(session: CoveSession, service_name: str, region_name: O @retry -def get_region_ecr_repos(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_ecr_repos(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("ecr", region_name=region_name) @@ -147,7 +149,7 @@ def get_image_last_used_time(vm_image: VmImage) -> Optional[datetime.datetime]: @retry -def get_region_vm_images(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_vm_images(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("ec2", region_name=region_name) @@ -161,7 +163,7 @@ def get_region_vm_images(session: CoveSession, service_name: str, region_name: O @retry -def get_region_cluster_nodes(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_cluster_nodes(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] eks_client = session.client("eks", region_name=region_name) @@ -183,7 +185,7 @@ def get_region_cluster_nodes(session: CoveSession, service_name: str, region_nam @retry -def get_region_rds_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_rds_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: supported_db_engines = ["postgres", "mysql", "mongodb"] if hasattr(session, "session_information"): region_name = session.session_information['Region'] @@ -191,11 +193,13 @@ def get_region_rds_resources(session: CoveSession, service_name: str, region_nam paginator = client.get_paginator("describe_db_instances") count = 0 clusters_set = set() + max_db_size_gb = int(args.max_db_size_gb) if args.max_db_size_gb else MAX_DB_SIZE_GB for page in paginator.paginate(): for db_instance in page["DBInstances"]: db_engine = db_instance["Engine"] + db_size = int(db_instance.get("AllocatedStorage", 0)) db_engine_normalized = db_engine.replace("aurora-", "").replace("postgresql", "postgres") - if db_engine_normalized in supported_db_engines: + if db_engine_normalized in supported_db_engines and db_size <= max_db_size_gb: if not db_instance.get('ReadReplicaSourceDBInstanceIdentifier'): if cluster_name := db_instance.get('DBClusterIdentifier'): clusters_set.add(cluster_name) @@ -206,7 +210,7 @@ def get_region_rds_resources(session: CoveSession, service_name: str, region_nam @retry -def get_region_dynamodb_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_dynamodb_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("dynamodb", region_name=region_name) @@ -218,7 +222,7 @@ def get_region_dynamodb_resources(session: CoveSession, service_name: str, regio @retry -def get_region_redshift_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None) -> int: +def get_region_redshift_resources(session: CoveSession, service_name: str, region_name: Optional[str] = None, args: Optional[argparse.Namespace] = None) -> int: if hasattr(session, "session_information"): region_name = session.session_information['Region'] client = session.client("redshift", region_name=region_name) @@ -262,17 +266,17 @@ def get_region_redshift_resources(session: CoveSession, service_name: str, regio }, "rds": { "function": get_region_rds_resources, - "display_name": "RDS Instances and Clusters", + "display_name": "Managed Databases - RDS", "workload_units": 1 }, - "dynamodb": { + "ddb": { "function": get_region_dynamodb_resources, - "display_name": "DynamoDB Tables", + "display_name": "Data Warehouses - DynamoDB", "workload_units": 50 }, "redshift": { "function": get_region_redshift_resources, - "display_name": "Redshift Clusters", + "display_name": "Data Warehouses - Redshift", "workload_units": 50 }, } @@ -287,13 +291,13 @@ def get_cove_region_resources(session: CoveSession) -> Dict[str, int]: return results -def current_account_resources_count(session: boto3.Session) -> Dict[str, int]: +def current_account_resources_count(session: boto3.Session, args: argparse.Namespace) -> Dict[str, int]: logger.info(f"Counting resources for the current account...") total_results: Dict[str, int] = defaultdict(int) for i, region in enumerate(ALL_REGIONS): logger.info(f"Region: {region} ({i + 1}/{len(ALL_REGIONS)})") for service_name, conf in SERVICES_CONF.items(): - total_results[service_name] += conf["function"](session, service_name, region) + total_results[service_name] += conf["function"](session, service_name, region, args) return total_results @@ -344,13 +348,12 @@ def set_skip_resources(args: argparse.Namespace) -> None: if args.skip_container_hosts: skipped_resources.append(SERVICES_CONF["eks"]['display_name']) SERVICES_CONF.pop("eks") - if args.skip_rds: + if args.skip_managed_dbs: skipped_resources.append(SERVICES_CONF["rds"]['display_name']) SERVICES_CONF.pop("rds") - if args.skip_dynamodb: - skipped_resources.append(SERVICES_CONF["dynamodb"]['display_name']) - SERVICES_CONF.pop("dynamodb") - if args.skip_redshift: + if args.skip_data_warehouses: + skipped_resources.append(SERVICES_CONF["ddb"]['display_name']) + SERVICES_CONF.pop("ddb") skipped_resources.append(SERVICES_CONF["redshift"]['display_name']) SERVICES_CONF.pop("redshift") if skipped_resources: @@ -383,18 +386,17 @@ def main(): _parser.add_argument("--skip-container-hosts", action="store_true", help=f"Skip counting {SERVICES_CONF['eks']['display_name']}") - _parser.add_argument("--skip-rds", action="store_true", + _parser.add_argument("--skip-managed-dbs", action="store_true", help=f"Skip counting {SERVICES_CONF['rds']['display_name']}") - _parser.add_argument("--skip-dynamodb", action="store_true", - help=f"Skip counting {SERVICES_CONF['dynamodb']['display_name']}") - - _parser.add_argument("--skip-redshift", action="store_true", - help=f"Skip counting {SERVICES_CONF['redshift']['display_name']}") + _parser.add_argument("--skip-data-warehouses", action="store_true", + help=f"Skip counting {SERVICES_CONF['ddb']['display_name']} and {SERVICES_CONF['redshift']['display_name']}") _parser.add_argument("--show-logs-per-account", action="store_true", help=f"Log resource count per AWS account") + _parser.add_argument("--max-db-size-gb", help=f"List managed DBs up to a certain size", required=False) + args = _parser.parse_args() set_skip_resources(args) if not SERVICES_CONF: @@ -402,7 +404,7 @@ def main(): return show_logs_per_account: bool = args.show_logs_per_account session = boto3.Session() - total_results: Dict[str, int] = current_account_resources_count(session) + total_results: Dict[str, int] = current_account_resources_count(session, args) accounts_list: List[str] = args.accounts_list.strip().split(",") if args.accounts_list else [] if args.only_current_account: if show_logs_per_account: From 3d5d09c4bbc579840257a158049bb4461deeeb7f Mon Sep 17 00:00:00 2001 From: kanstantsink-orca <126084426+kanstantsink-orca@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:13:42 +0000 Subject: [PATCH 3/3] Update aws_resource_count.py Change workload units for DDB to 4 --- aws_resource_count.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws_resource_count.py b/aws_resource_count.py index eddd19b..40c46b3 100644 --- a/aws_resource_count.py +++ b/aws_resource_count.py @@ -272,7 +272,7 @@ def get_region_redshift_resources(session: CoveSession, service_name: str, regio "ddb": { "function": get_region_dynamodb_resources, "display_name": "Data Warehouses - DynamoDB", - "workload_units": 50 + "workload_units": 4 }, "redshift": { "function": get_region_redshift_resources,