From 3067ae0f62294bd865315be42f5f04bde4ceb30b Mon Sep 17 00:00:00 2001 From: Komal Thareja Date: Tue, 14 May 2024 09:35:18 -0400 Subject: [PATCH] add verify ssh --- fabric_am/config/switch_handler_config.yml | 3 - fabric_am/handlers/switch_handler.py | 107 ++++-------- fabric_am/handlers/vm_handler.py | 156 ++++++------------ .../head_switch_provisioning/tasks/main.yml | 6 +- fabric_am/util/am_constants.py | 1 + fabric_am/util/utils.py | 105 ++++++++++++ 6 files changed, 183 insertions(+), 195 deletions(-) diff --git a/fabric_am/config/switch_handler_config.yml b/fabric_am/config/switch_handler_config.yml index adace5d..2b06e08 100644 --- a/fabric_am/config/switch_handler_config.yml +++ b/fabric_am/config/switch_handler_config.yml @@ -22,9 +22,6 @@ # # # Author: Komal Thareja (kthare10@renci.org) -ansible: - ansible_python_interpreter: /usr/bin/python3.6 -runtime: playbooks: location: /etc/fabric/actor/playbooks inventory_location: /etc/fabric/actor/playbooks/inventory diff --git a/fabric_am/handlers/switch_handler.py b/fabric_am/handlers/switch_handler.py index b8eb932..003ee4e 100644 --- a/fabric_am/handlers/switch_handler.py +++ b/fabric_am/handlers/switch_handler.py @@ -36,6 +36,7 @@ from fabric_am.util.am_constants import AmConstants from fabric_am.util.ansible_helper import AnsibleHelper +from fabric_am.util.utils import Utils class SwitchHandlerException(Exception): @@ -71,8 +72,8 @@ def clean_restart(self): cleanup_playbook = f"{playbook_path}/{cleanup_section[AmConstants.CLEAN_ALL]}" inventory_path = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.PB_INVENTORY] extra_vars = {AmConstants.OPERATION: AmConstants.OP_DELETE_ALL} - self.__execute_ansible(inventory_path=inventory_path, playbook_path=cleanup_playbook, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=cleanup_playbook, + extra_vars=extra_vars, logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Failure to clean up existing VMs: {e}") self.get_logger().error(traceback.format_exc()) @@ -121,11 +122,31 @@ def create(self, unit: ConfigToken) -> Tuple[dict, ConfigToken]: # create switch extra_vars = { - AmConstants.OPERATION: AmConstants.OP_CREATE, + AmConstants.OPERATION: AmConstants.OP_CREATE + } + + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=f"{playbook_path}/{playbook}", + extra_vars=extra_vars, logger=self.get_logger()) + + from ansible.inventory.manager import InventoryManager + from ansible.parsing.dataloader import DataLoader + data_loader = DataLoader() + inventory = InventoryManager(loader=data_loader, + sources=[inventory_path]) + host = inventory.get_host(hostname=f"{sliver.get_site()}-p4.fabric-testbed.net") + ansible_host = host.get_vars().get('ansible_host') + ansible_ssh_user = host.get_vars().get('ansible_ssh_user') + ansible_ssh_pwd = host.get_vars().get('ansible_ssh_pwd') + + Utils.verify_ssh(mgmt_ip=ansible_host, user=ansible_ssh_user, pwd=ansible_ssh_pwd, + logger=self.get_logger(), retry=10) + extra_vars = { + AmConstants.OPERATION: AmConstants.OP_CONFIG, AmConstants.SSH_KEY: ssh_key } - self.__execute_ansible(inventory_path=inventory_path, playbook_path=f"{playbook_path}/{playbook}", - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=f"{playbook_path}/{playbook}", + extra_vars=extra_vars, logger=self.get_logger()) + except Exception as e: self.get_logger().error(e) self.get_logger().error(traceback.format_exc()) @@ -163,9 +184,7 @@ def delete(self, unit: ConfigToken) -> Tuple[dict, ConfigToken]: f"resource_type: {sliver.get_type()}") unit_id = str(unit.get_reservation_id()) - unit_properties = unit.get_properties() - project_id = unit_properties.get(Constants.PROJECT_ID, None) - self.__cleanup(sliver=sliver, unit_id=unit_id, project_id=project_id) + self.__cleanup(sliver=sliver, unit_id=unit_id) except Exception as e: result = {Constants.PROPERTY_TARGET_NAME: Constants.TARGET_DELETE, Constants.PROPERTY_TARGET_RESULT_CODE: Constants.RESULT_CODE_EXCEPTION, @@ -232,7 +251,6 @@ def __cleanup(self, *, sliver: NodeSliver, unit_id: str, raise_exception: bool = Cleanup VM and detach PCI devices :param sliver: Sliver :param unit_id: Unit Id - :param project_id: Project Id :param raise_exception: Raise exception if raise_exception flag is True :return: """ @@ -246,8 +264,8 @@ def __cleanup(self, *, sliver: NodeSliver, unit_id: str, raise_exception: bool = extra_vars = { AmConstants.OPERATION: AmConstants.OP_DELETE } - self.__execute_ansible(inventory_path=inventory_path, playbook_path=f"{playbook_path}/{playbook}", - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=f"{playbook_path}/{playbook}", + extra_vars=extra_vars, logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Exception occurred in cleanup {unit_id} error: {e}") @@ -255,70 +273,3 @@ def __cleanup(self, *, sliver: NodeSliver, unit_id: str, raise_exception: bool = if raise_exception: raise e - def __execute_command(self, *, mgmt_ip: str, user: str, command: str, timeout: int = 60, retry: int = 3): - """ - Execute a command on the VM - :param mgmt_ip Management IP to access the VM - :param user Default Linux user to use for SSH/Ansible - :param command Command to execute - :param timeout Timeout in seconds - :param retry Number of retries - :return: - """ - for i in range(retry): - try: - # Construct the SSH client - ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - key_file = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] - pkey = paramiko.RSAKey.from_private_key_file(key_file) - ssh.connect(mgmt_ip, username=user, timeout=timeout, pkey=pkey) - - # Execute the command - stdin, stdout, stderr = ssh.exec_command(command) - output = stdout.readlines() - ssh.close() - return output - except Exception as e: - self.get_logger().error(f"Exception : {e}") - self.get_logger().error(traceback.format_exc()) - if i < retry - 1: - time.sleep(timeout) - self.get_logger().info(f"Retrying command {command} on VM {mgmt_ip}") - else: - self.get_logger().error(f"Failed to execute command {command} on VM {mgmt_ip}") - raise e - - def __verify_ssh(self, *, mgmt_ip: str, user: str, timeout: int = 60, retry: int = 10): - """ - Verify that the VM is accessible via SSH - :param mgmt_ip Management IP to access the VM - :param user Default Linux user to use for SSH/Ansible - :param retry Number of retries - :param retry_interval Timeout in seconds - - """ - command = f"echo test ssh from {mgmt_ip} > /tmp/fabric_execute_script.sh; " \ - f"chmod +x /tmp/fabric_execute_script.sh; /tmp/fabric_execute_script.sh" - - try: - output = self.__execute_command(mgmt_ip=mgmt_ip, user=user, command=command, - timeout=timeout, retry=retry) - self.get_logger().info(f"Output: {output}") - except Exception as e: - pass - - def __execute_ansible(self, *, inventory_path: str, playbook_path: str, extra_vars: dict, sources: str = None, - private_key_file: str = None, host_vars: dict = None, host: str = None, user: str = None): - ansible_helper = AnsibleHelper(inventory_path=inventory_path, logger=self.get_logger(), - sources=sources) - - ansible_helper.set_extra_vars(extra_vars=extra_vars) - - if host is not None and host_vars is not None and len(host_vars) > 0: - for key, value in host_vars.items(): - ansible_helper.add_vars(host=host, var_name=key, value=value) - - self.get_logger().info(f"Executing playbook {playbook_path} extra_vars: {extra_vars} host_vars: {host_vars}") - ansible_helper.run_playbook(playbook_path=playbook_path, private_key_file=private_key_file, user=user) - return ansible_helper.get_result_callback().get_json_result_ok() diff --git a/fabric_am/handlers/vm_handler.py b/fabric_am/handlers/vm_handler.py index b47af3e..f9dac66 100644 --- a/fabric_am/handlers/vm_handler.py +++ b/fabric_am/handlers/vm_handler.py @@ -29,7 +29,6 @@ import traceback from typing import Tuple, List, Dict -import paramiko from fabric_cf.actor.core.common.constants import Constants from fabric_cf.actor.core.plugins.handlers.config_token import ConfigToken from fabric_cf.actor.handlers.handler_base import HandlerBase @@ -39,7 +38,6 @@ from jinja2 import Environment from fabric_am.util.am_constants import AmConstants -from fabric_am.util.ansible_helper import AnsibleHelper from fabric_am.util.utils import Utils @@ -80,8 +78,8 @@ def clean_restart(self): cleanup_playbook = f"{playbook_path}/{cleanup_section[AmConstants.CLEAN_ALL]}" inventory_path = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.PB_INVENTORY] extra_vars = {AmConstants.OPERATION: AmConstants.OP_DELETE_ALL} - self.__execute_ansible(inventory_path=inventory_path, playbook_path=cleanup_playbook, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=cleanup_playbook, + extra_vars=extra_vars, logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Failure to clean up existing VMs: {e}") self.get_logger().error(traceback.format_exc()) @@ -184,7 +182,9 @@ def create(self, unit: ConfigToken) -> Tuple[dict, ConfigToken]: # Verify SSH connectivity ssh_retries = self.get_config()[AmConstants.RUNTIME_SECTION][AmConstants.RT_SSH_RETRIES] - self.__verify_ssh(mgmt_ip=fip, user=user, retry=ssh_retries) + admin_ssh_key = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] + Utils.verify_ssh(mgmt_ip=fip, user=user, retry=ssh_retries, ssh_key_file=admin_ssh_key, + logger=self.get_logger()) sliver.label_allocations.instance = instance_props.get(AmConstants.SERVER_INSTANCE_NAME, None) @@ -200,7 +200,8 @@ def create(self, unit: ConfigToken) -> Tuple[dict, ConfigToken]: self.__perform_os_server_action(playbook_path=playbook_path_full, inventory_path=inventory_path, vm_name=vmname, unit_id=unit_id, action=AmConstants.OP_REBOOT) - self.__verify_ssh(mgmt_ip=fip, user=user, retry=ssh_retries) + Utils.verify_ssh(mgmt_ip=fip, user=user, retry=ssh_retries, ssh_key_file=admin_ssh_key, + logger=self.get_logger()) sliver.management_ip = fip # Configure Components - only gets triggered via Portal for now @@ -387,7 +388,8 @@ def modify(self, unit: ConfigToken) -> Tuple[dict, ConfigToken]: self.get_logger().info(f"Modify completed") return result, unit - def __build_user_data(self, *, default_user: str, ssh_key: str, init_script: str = None): + @staticmethod + def __build_user_data(*, default_user: str, ssh_key: str, init_script: str = None): user_data = "#!/bin/bash\n" ssh_keys = ssh_key.split(",") for key in ssh_keys: @@ -436,7 +438,8 @@ def __create_vm(self, *, playbook_path: str, inventory_path: str, vm_name: str, AmConstants.INIT_SCRIPT: init_script, AmConstants.USER_DATA: user_data } - ok = self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars) + ok = Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars, + logger=self.get_logger()) server = ok.get(AmConstants.SERVER, None) # Added this code for enabling test suite @@ -475,8 +478,8 @@ def __delete_vm(self, *, playbook_path: str, inventory_path: str, sliver: NodeSl for i in range(delete_retries): try: self.get_logger().debug(f"Delete attempt # {i}") - self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) time.sleep(5) break except Exception as e: @@ -499,8 +502,8 @@ def __delete_vm(self, *, playbook_path: str, inventory_path: str, sliver: NodeSl for i in range(delete_retries): try: self.get_logger().debug(f"Delete via libvirt attempt # {i}") - self.__execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) time.sleep(5) break except Exception as e: @@ -516,8 +519,8 @@ def __delete_vm(self, *, playbook_path: str, inventory_path: str, sliver: NodeSl AmConstants.KVM_GUEST_NAME: sliver.get_label_allocations().instance } try: - self.__execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) except Exception as e: self.get_logger().warning(f'List VM post deletion failed') self.get_logger().error(e) @@ -529,7 +532,6 @@ def __attach_fip(self, *, playbook_path: str, inventory_path: str, vm_name: str, Invoke ansible playbook to attach a floating IP to a provisioned VM :param playbook_path: playbook location :param inventory_path: inventory location - :param host: host :param vm_name: VM Name :param unit_id: Unit Id :return: floating ip assigned to the VM @@ -541,8 +543,8 @@ def __attach_fip(self, *, playbook_path: str, inventory_path: str, vm_name: str, extra_vars = {AmConstants.OPERATION: AmConstants.OP_ATTACH_FIP, AmConstants.VM_NAME: vmname} - ok = self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, - extra_vars=extra_vars) + ok = Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) if self.test_mode: floating_ip = ok[AmConstants.ANSIBLE_FACTS][AmConstants.FLOATING_IP] @@ -602,8 +604,8 @@ def __mount_storage(self, *, component: ComponentSliver, mgmt_ip: str, user: str self.get_logger().info(f"Executing playbook {playbook_path} to mount volume: " f"{component.label_allocations.device_name} " f"on: {mgmt_ip}") - self.__execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, - sources=f"{mgmt_ip},", private_key_file=admin_ssh_key) + Utils.execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, + sources=f"{mgmt_ip},", private_key_file=admin_ssh_key, logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Failed to mount the volume, we ignore the failure: {e}") @@ -623,7 +625,8 @@ def __attach_detach_storage(self, *, playbook_path: str, inventory_path: str, vm if attach: extra_vars[AmConstants.OPERATION] = AmConstants.OP_ATTACH - ok = self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars) + ok = Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) attachments = ok.get(AmConstants.ATTACHMENTS, None) if attachments is not None: for a in attachments: @@ -654,7 +657,8 @@ def __cleanup_vnic(self, *, inventory_path: str, vm_name: str, component: Compon AmConstants.VM_NAME: f'{device_name}-{vm_name}', AmConstants.PORT_NAME: f'{device_name}-{vm_name}-{vm_name}-{ifs_name}'} - return self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars) + return Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars, + logger=self.get_logger()) def __attach_detach_multiple_function_pci(self, *, playbook_path: str, inventory_path: str, host: str, instance_name: str, device_name: str, component: ComponentSliver, @@ -724,8 +728,8 @@ def __attach_detach_multiple_function_pci(self, *, playbook_path: str, inventory AmConstants.PCI_BUS: f"0x{matches[2]}", AmConstants.PCI_SLOT: f"0x{matches[3]}" } - ok = self.__execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, - extra_vars=extra_vars, host=host, host_vars=host_vars) + ok = Utils.execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, + extra_vars=extra_vars, host=host, host_vars=host_vars, logger=self.get_logger()) # In case of Attach, determine the PCI device id from inside the VM # Also, determine the ethernet interface name in case of Shared/Smart NIC @@ -889,8 +893,9 @@ def __attach_detach_pci(self, *, playbook_path: str, inventory_path: str, host: mac = ns.interface_info.interfaces[interface_names[idx]].label_allocations.mac.lower() host_vars[AmConstants.MAC] = mac - ok = self.__execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, - extra_vars=extra_vars, host=worker_node, host_vars=host_vars) + ok = Utils.execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, + extra_vars=extra_vars, host=worker_node, host_vars=host_vars, + logger=self.get_logger()) if attach and ok: idx += 1 @@ -950,8 +955,8 @@ def __cleanup_pci(self, *, playbook_path: str, inventory_path: str, host: str, c self.get_logger().info(f"Device List Size: {len(pci_device_list)} List: {pci_device_list}") for device in pci_device_list: extra_vars[AmConstants.DEVICE] = device - self.__execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, - extra_vars=extra_vars) + Utils.execute_ansible(inventory_path=inventory_path, playbook_path=full_playbook_path, + extra_vars=extra_vars, logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Error occurred cleaning device: {component}") if raise_exception: @@ -1029,7 +1034,7 @@ def __extract_device_addr_octets(*, device_address: str) -> List[str]: return result def __perform_virsh_server_action(self, *, playbook_path: str, inventory_path: str, worker_node_name: str, - instance_name: str, operation: str, vcpu_cpu_map:List[Dict[str, str]] = None, + instance_name: str, operation: str, vcpu_cpu_map: List[Dict[str, str]] = None, node_set: List[str] = None): """ Invoke ansible playbook to perform a server action via openstack commands @@ -1052,8 +1057,8 @@ def __perform_virsh_server_action(self, *, playbook_path: str, inventory_path: s if node_set is not None: extra_vars[AmConstants.NODE_SET] = node_set - return self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path_full, - extra_vars=extra_vars) + return Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path_full, + extra_vars=extra_vars, logger=self.get_logger()) def __perform_os_server_action(self, *, playbook_path: str, inventory_path: str, vm_name: str, unit_id: str, action: str): @@ -1070,7 +1075,8 @@ def __perform_os_server_action(self, *, playbook_path: str, inventory_path: str, extra_vars = {AmConstants.OPERATION: action, AmConstants.VM_NAME: vm_name} - return self.__execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars) + return Utils.execute_ansible(inventory_path=inventory_path, playbook_path=playbook_path, extra_vars=extra_vars, + logger=self.get_logger()) def __get_default_user(self, *, image: str) -> str: """ @@ -1168,8 +1174,9 @@ def configure_network_interface(self, *, mgmt_ip: str, user: str, resource_type: # Grab the SSH Key admin_ssh_key = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] - self.__execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, - sources=f"{mgmt_ip},", private_key_file=admin_ssh_key, user=user) + Utils.execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, + sources=f"{mgmt_ip},", private_key_file=admin_ssh_key, user=user, + logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Exception : {e}") self.get_logger().error(traceback.format_exc()) @@ -1208,85 +1215,13 @@ def __post_boot_config(self, *, mgmt_ip: str, user: str, pci_device_number: str # Grab the SSH Key admin_ssh_key = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] - return self.__execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, - sources=f"{mgmt_ip},", private_key_file=admin_ssh_key, user=user) + return Utils.execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, + sources=f"{mgmt_ip},", private_key_file=admin_ssh_key, user=user, + logger=self.get_logger()) except Exception as e: self.get_logger().error(f"Exception : {e}") self.get_logger().error(traceback.format_exc()) - def __execute_command(self, *, mgmt_ip: str, user: str, command: str, timeout: int = 60, retry: int = 3): - """ - Execute a command on the VM - :param mgmt_ip Management IP to access the VM - :param user Default Linux user to use for SSH/Ansible - :param command Command to execute - :param timeout Timeout in seconds - :param retry Number of retries - :return: - """ - for i in range(retry): - try: - # Construct the SSH client - ssh = paramiko.SSHClient() - ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - key_file = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] - pkey = paramiko.RSAKey.from_private_key_file(key_file) - ssh.connect(mgmt_ip, username=user, timeout=timeout, pkey=pkey) - - # Execute the command - stdin, stdout, stderr = ssh.exec_command(command) - output = stdout.readlines() - ssh.close() - return output - except Exception as e: - self.get_logger().error(f"Exception : {e}") - self.get_logger().error(traceback.format_exc()) - if i < retry - 1: - time.sleep(timeout) - self.get_logger().info(f"Retrying command {command} on VM {mgmt_ip}") - else: - self.get_logger().error(f"Failed to execute command {command} on VM {mgmt_ip}") - raise e - - def __verify_ssh(self, *, mgmt_ip: str, user: str, timeout: int = 60, retry: int = 10): - """ - Verify that the VM is accessible via SSH - :param mgmt_ip Management IP to access the VM - :param user Default Linux user to use for SSH/Ansible - :param retry Number of retries - :param retry_interval Timeout in seconds - - """ - command = f"echo test ssh from {mgmt_ip} > /tmp/fabric_execute_script.sh; " \ - f"chmod +x /tmp/fabric_execute_script.sh; /tmp/fabric_execute_script.sh" - - try: - output = self.__execute_command(mgmt_ip=mgmt_ip, user=user, command=command, - timeout=timeout, retry=retry) - self.get_logger().info(f"Output: {output}") - except Exception as e: - pass - - def __execute_ansible(self, *, inventory_path: str, playbook_path: str, extra_vars: dict, sources: str = None, - private_key_file: str = None, host_vars: dict = None, host: str = None, user: str = None): - ansible_python_interpreter = None - # Head node or Worker - if inventory_path is not None: - ansible_python_interpreter = self.get_ansible_python_interpreter() - ansible_helper = AnsibleHelper(inventory_path=inventory_path, logger=self.get_logger(), - ansible_python_interpreter=ansible_python_interpreter, - sources=sources) - - ansible_helper.set_extra_vars(extra_vars=extra_vars) - - if host is not None and host_vars is not None and len(host_vars) > 0: - for key, value in host_vars.items(): - ansible_helper.add_vars(host=host, var_name=key, value=value) - - self.get_logger().info(f"Executing playbook {playbook_path} extra_vars: {extra_vars} host_vars: {host_vars}") - ansible_helper.run_playbook(playbook_path=playbook_path, private_key_file=private_key_file, user=user) - return ansible_helper.get_result_callback().get_json_result_ok() - def __poa_cpuinfo(self, unit: ConfigToken, data: dict) -> dict: result = {Constants.PROPERTY_TARGET_NAME: Constants.TARGET_POA, Constants.PROPERTY_TARGET_RESULT_CODE: Constants.RESULT_CODE_OK, @@ -1626,8 +1561,9 @@ def __poa_sshkey(self, unit: ConfigToken, data: dict, operation: str) -> dict: # Grab the SSH Key admin_ssh_key = self.get_config()[AmConstants.PLAYBOOK_SECTION][AmConstants.ADMIN_SSH_KEY] - self.__execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, - sources=f"{sliver.management_ip},", private_key_file=admin_ssh_key, user=user) + Utils.execute_ansible(inventory_path=None, playbook_path=playbook_path, extra_vars=extra_vars, + sources=f"{sliver.management_ip},", private_key_file=admin_ssh_key, user=user, + logger=self.get_logger()) result[Constants.PROPERTY_POA_INFO] = { AmConstants.OPERATION: data.get(AmConstants.OPERATION), diff --git a/fabric_am/playbooks/roles/head_switch_provisioning/tasks/main.yml b/fabric_am/playbooks/roles/head_switch_provisioning/tasks/main.yml index 68835b8..a0c44f8 100644 --- a/fabric_am/playbooks/roles/head_switch_provisioning/tasks/main.yml +++ b/fabric_am/playbooks/roles/head_switch_provisioning/tasks/main.yml @@ -35,9 +35,7 @@ when: operation == 'create' or operation == 'delete' - name: Reboot the switch - reboot: - async: 0 - poll: 0 + shell: echo {{ ansible_ssh_pass }} | sudo -S reboot when: operation == 'create' or operation == 'delete' - name: Add public key to authorized_keys @@ -49,4 +47,4 @@ owner: "{{ ansible_ssh_user }}" group: "{{ ansible_ssh_pass }}" mode: "0600" - when: operation == 'create' \ No newline at end of file + when: operation == 'config' \ No newline at end of file diff --git a/fabric_am/util/am_constants.py b/fabric_am/util/am_constants.py index 21b16e6..c3bb284 100644 --- a/fabric_am/util/am_constants.py +++ b/fabric_am/util/am_constants.py @@ -70,6 +70,7 @@ class AmConstants: OP_MOUNT = "mount" OP_ADDKEY = "addkey" OP_REMOVEKEY = "removekey" + OP_CONFIG = "config" PORT_NAME = "portname" NETWORK_NAME = "networkname" diff --git a/fabric_am/util/utils.py b/fabric_am/util/utils.py index cf5a343..683638e 100644 --- a/fabric_am/util/utils.py +++ b/fabric_am/util/utils.py @@ -23,9 +23,17 @@ # # # Author: Komal Thareja (kthare10@renci.org) +import logging import re +import time +import traceback from typing import List, Dict, Any +import paramiko + +from fabric_am.util.am_constants import AmConstants +from fabric_am.util.ansible_helper import AnsibleHelper + class Utils: """ @@ -228,3 +236,100 @@ def parse_numactl(*, numactl_output: str) -> Dict[Any, dict]: else: result[key] = value return result + + @staticmethod + def execute_command(*, mgmt_ip: str, user: str, command: str, logger: logging.Logger, + timeout: int = 60, retry: int = 3, pwd: str = None, ssh_key_file: str = None): + """ + Execute a command on the VM + :param mgmt_ip Management IP to access the VM + :param user Default Linux user to use for SSH/Ansible + :param command Command to execute + :param logger logger + :param timeout Timeout in seconds + :param retry Number of retries + :param pwd password + :param ssh_key_file ssh_key file + :return: + """ + for i in range(retry): + try: + # Construct the SSH client + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + if pwd: + # Use password for authentication + ssh.connect(mgmt_ip, username=user, password=pwd, timeout=timeout) + else: + pkey = paramiko.RSAKey.from_private_key_file(ssh_key_file) + ssh.connect(mgmt_ip, username=user, timeout=timeout, pkey=pkey) + + # Execute the command + stdin, stdout, stderr = ssh.exec_command(command) + output = stdout.readlines() + ssh.close() + return output + except Exception as e: + logger.error(f"Exception : {e}") + logger.error(traceback.format_exc()) + if i < retry - 1: + time.sleep(timeout) + logger.info(f"Retrying command {command} on VM {mgmt_ip}") + else: + logger.error(f"Failed to execute command {command} on VM {mgmt_ip}") + raise e + + @staticmethod + def verify_ssh(*, mgmt_ip: str, user: str, logger: logging.Logger, timeout: int = 60, retry: int = 10, + pwd: str = None, ssh_key_file: str = None): + """ + Verify that the VM is accessible via SSH + :param mgmt_ip Management IP to access the VM + :param user Default Linux user to use for SSH/Ansible + :param logger Logger + :param timeout timeout + :param retry Number of retries + :param pwd password + :param ssh_key_file ssh_key_file + + """ + command = f"echo test ssh from {mgmt_ip} > /tmp/fabric_execute_script.sh; " \ + f"chmod +x /tmp/fabric_execute_script.sh; /tmp/fabric_execute_script.sh" + + try: + output = Utils.execute_command(mgmt_ip=mgmt_ip, user=user, command=command, logger=logger, + timeout=timeout, retry=retry, pwd=pwd, ssh_key_file=ssh_key_file) + logger.info(f"Output: {output}") + except Exception as e: + pass + + @staticmethod + def execute_ansible(*, inventory_path: str, playbook_path: str, extra_vars: dict, logger: logging.Logger, + sources: str = None, private_key_file: str = None, host_vars: dict = None, + host: str = None, user: str = None): + """ + Execute ansible + :param inventory_path: inventory location + :param playbook_path: playbook + :param extra_vars: extra vars + :param logger: logger + :param sources: sources + :param private_key_file: private key file + :param host_vars: host vars + :param host: host + :param user: user + + :return OK results + :raises Exception in case of failure + """ + ansible_helper = AnsibleHelper(inventory_path=inventory_path, logger=logger, sources=sources) + + ansible_helper.set_extra_vars(extra_vars=extra_vars) + + if host is not None and host_vars is not None and len(host_vars) > 0: + for key, value in host_vars.items(): + ansible_helper.add_vars(host=host, var_name=key, value=value) + + logger.info(f"Executing playbook {playbook_path} extra_vars: {extra_vars} host_vars: {host_vars}") + ansible_helper.run_playbook(playbook_path=playbook_path, private_key_file=private_key_file, user=user) + return ansible_helper.get_result_callback().get_json_result_ok()