From 305b33e7deb5e45e89a94a843251acdef49ab2d6 Mon Sep 17 00:00:00 2001 From: Kristian Feldsam Date: Mon, 4 Nov 2024 21:13:18 +0100 Subject: [PATCH] B #6772: Fix for NUMA and CPU Pinning Discrepancies During VM Save and Live Migration Signed-off-by: Kristian Feldsam --- include/History.h | 2 + include/VirtualMachine.h | 24 ++++++ include/VirtualMachineManager.h | 4 +- src/rm/RequestManagerVirtualMachine.cc | 8 -- src/vm/History.cc | 10 +++ src/vmm/LibVirtDriverKVM.cc | 2 + src/vmm/VirtualMachineManager.cc | 79 ++++++++++++++++++-- src/vmm_mad/exec/one_vmm_exec.rb | 76 +++++++++++++++---- src/vmm_mad/remotes/kvm/migrate | 6 +- src/vmm_mad/remotes/kvm/migrate_local | 17 ++++- src/vmm_mad/remotes/kvm/save | 23 ++++-- src/vmm_mad/remotes/lib/kvm/opennebula_vm.rb | 8 +- 12 files changed, 213 insertions(+), 46 deletions(-) diff --git a/include/History.h b/include/History.h index 1eb619d538d..837b6069035 100644 --- a/include/History.h +++ b/include/History.h @@ -114,11 +114,13 @@ class History:public ObjectSQL, public ObjectXML std::string deployment_file; std::string context_file; std::string token_file; + std::string migrate_file; // Remote paths std::string checkpoint_file; std::string rdeployment_file; std::string system_dir; + std::string rmigrate_file; /** * Writes the history record in the DB diff --git a/include/VirtualMachine.h b/include/VirtualMachine.h index eaa0a814a30..b767a08ac47 100644 --- a/include/VirtualMachine.h +++ b/include/VirtualMachine.h @@ -585,6 +585,19 @@ class VirtualMachine : public PoolObjectSQL return history->token_file; } + /** + * Returns the migrate filename. The migrate file is in the form: + * $ONE_LOCATION/var/vms/$VM_ID/migrate.$SEQ + * or, in case that OpenNebula is installed in root + * /var/lib/one/vms/$VM_ID/migrate.$SEQ + * The hasHistory() function MUST be called before this one. + * @return the migrate file path + */ + const std::string & get_migrate_file() const + { + return history->migrate_file; + }; + /** * Returns the remote deployment filename. The file is in the form: * $DS_LOCATION/$SYSTEM_DS/$VM_ID/deployment.$SEQ @@ -596,6 +609,17 @@ class VirtualMachine : public PoolObjectSQL return history->rdeployment_file; }; + /** + * Returns the remote migrate filename. The file is in the form: + * $DS_LOCATION/$SYSTEM_DS/$VM_ID/migrate.$SEQ + * The hasHistory() function MUST be called before this one. + * @return the migrate filename + */ + const std::string & get_rmigrate_file() const + { + return history->rmigrate_file; + }; + /** * Returns the checkpoint filename for the current host. The checkpoint file * is in the form: diff --git a/include/VirtualMachineManager.h b/include/VirtualMachineManager.h index 5eeef166592..214c53d56db 100644 --- a/include/VirtualMachineManager.h +++ b/include/VirtualMachineManager.h @@ -394,7 +394,9 @@ class VirtualMachineManager : const std::string& tmpl, int ds_id, int sgid = -1, - int nicid = -1); + int nicid = -1, + const std::string& lmfile = "", + const std::string& rmfile = ""); public: /** diff --git a/src/rm/RequestManagerVirtualMachine.cc b/src/rm/RequestManagerVirtualMachine.cc index c6326536d25..80a215b44f9 100644 --- a/src/rm/RequestManagerVirtualMachine.cc +++ b/src/rm/RequestManagerVirtualMachine.cc @@ -1181,14 +1181,6 @@ void VirtualMachineMigrate::request_execute(xmlrpc_c::paramList const& paramList if (live) { action = VMActions::LIVE_MIGRATE_ACTION; - - if ( vm->is_pinned() ) - { - att.resp_msg = "VM with a pinned NUMA topology cannot be live-migrated"; - failure_response(ACTION, att); - - return; - } } else { diff --git a/src/vm/History.cc b/src/vm/History.cc index 4832ca16db4..48b049409cc 100644 --- a/src/vm/History.cc +++ b/src/vm/History.cc @@ -128,6 +128,11 @@ void History::non_persistent_data() token_file = os.str(); + os.str(""); + os << vm_lhome << "/migrate." << seq; + + migrate_file = os.str(); + // ----------- Remote Locations ------------ os.str(""); os << ds_location << "/" << ds_id << "/" << oid; @@ -141,6 +146,11 @@ void History::non_persistent_data() os << system_dir << "/deployment." << seq; rdeployment_file = os.str(); + + os.str(""); + os << system_dir << "/migrate." << seq; + + rmigrate_file = os.str(); } /* -------------------------------------------------------------------------- */ diff --git a/src/vmm/LibVirtDriverKVM.cc b/src/vmm/LibVirtDriverKVM.cc index 0ffd70ba24b..166f07a46b5 100644 --- a/src/vmm/LibVirtDriverKVM.cc +++ b/src/vmm/LibVirtDriverKVM.cc @@ -2316,5 +2316,7 @@ int LibVirtDriver::deployment_description_kvm( file << "" << endl; + file.close(); + return 0; } diff --git a/src/vmm/VirtualMachineManager.cc b/src/vmm/VirtualMachineManager.cc index b6b4c050800..986efe908a9 100644 --- a/src/vmm/VirtualMachineManager.cc +++ b/src/vmm/VirtualMachineManager.cc @@ -221,7 +221,9 @@ string VirtualMachineManager::format_message( const string& tmpl, int ds_id, int sgid, - int nicid) + int nicid, + const string& lmfile, + const string& rmfile) { ostringstream oss; @@ -266,6 +268,17 @@ string VirtualMachineManager::format_message( oss << ""; } + if (!lmfile.empty()) + { + oss << "" << lmfile << ""; + oss << "" << rmfile << ""; + } + else + { + oss << ""; + oss << ""; + } + if (!cfile.empty()) { oss << "" << cfile << ""; @@ -495,8 +508,9 @@ void VirtualMachineManager::trigger_save(int vid) trigger([this, vid] { const VirtualMachineManagerDriver * vmd; + int rc; - string hostname, checkpoint_file; + string hostname, checkpoint_file, migrate_file, rmigrate_file; string vm_tmpl; string drv_msg; int ds_id; @@ -535,12 +549,31 @@ void VirtualMachineManager::trigger_save(int vid) hostname = vm->get_previous_hostname(); checkpoint_file = vm->get_previous_checkpoint_file(); ds_id = vm->get_previous_ds_id(); + + //Generate VM description file + os << "Generating migrate file: " << vm->get_migrate_file(); + + vm->log("VMM", Log::INFO, os); + + os.str(""); + + rc = vmd->deployment_description(vm.get(), vm->get_migrate_file()); + + if (rc != 0) + { + goto error_file; + } + + migrate_file = vm->get_migrate_file(); + rmigrate_file = vm->get_rmigrate_file(); } else { hostname = vm->get_hostname(); checkpoint_file = vm->get_checkpoint_file(); ds_id = vm->get_ds_id(); + migrate_file = ""; + rmigrate_file = ""; } // Invoke driver method @@ -556,7 +589,10 @@ void VirtualMachineManager::trigger_save(int vid) "", vm->to_xml(vm_tmpl), ds_id, - -1); + -1, + -1, + migrate_file, + rmigrate_file); vmd->save(vid, drv_msg); @@ -570,6 +606,11 @@ void VirtualMachineManager::trigger_save(int vid) os << "save_action, error getting driver " << vm->get_vmm_mad(); goto error_common; +error_file: + os << "save_action, error generating migrate file: " + << vm->get_migrate_file(); + goto error_common; + error_previous_history: os << "save_action, VM has no previous history"; @@ -1154,10 +1195,12 @@ void VirtualMachineManager::trigger_migrate(int vid) trigger([this, vid] { const VirtualMachineManagerDriver * vmd; + int rc; ostringstream os; string vm_tmpl; string drv_msg; + string tm_command = ""; // Get the VM from the pool auto vm = vmpool->get(vid); @@ -1187,6 +1230,24 @@ void VirtualMachineManager::trigger_migrate(int vid) Nebula::instance().get_tm()->migrate_transfer_command(vm.get(), os); + tm_command = os.str(); + + os.str(""); + + //Generate VM description file + os << "Generating migrate file: " << vm->get_migrate_file(); + + vm->log("VMM", Log::INFO, os); + + os.str(""); + + rc = vmd->deployment_description(vm.get(), vm->get_migrate_file()); + + if (rc != 0) + { + goto error_file; + } + // Invoke driver method drv_msg = format_message( vm->get_previous_hostname(), @@ -1195,12 +1256,15 @@ void VirtualMachineManager::trigger_migrate(int vid) "", "", "", - os.str(), + tm_command, "", vm->get_system_dir(), vm->to_xml(vm_tmpl), vm->get_previous_ds_id(), - -1); + -1, + -1, + vm->get_migrate_file(), + vm->get_rmigrate_file()); vmd->migrate(vid, drv_msg); @@ -1214,6 +1278,11 @@ void VirtualMachineManager::trigger_migrate(int vid) os << "migrate_action, error getting driver " << vm->get_vmm_mad(); goto error_common; +error_file: + os << "migrate_action, error generating migrate file: " + << vm->get_migrate_file(); + goto error_common; + error_previous_history: os << "migrate_action, error VM has no previous history"; diff --git a/src/vmm_mad/exec/one_vmm_exec.rb b/src/vmm_mad/exec/one_vmm_exec.rb index 8d5c14909a9..74eece239fb 100755 --- a/src/vmm_mad/exec/one_vmm_exec.rb +++ b/src/vmm_mad/exec/one_vmm_exec.rb @@ -95,6 +95,8 @@ def initialize(driver, id, action, xml_data) # For migration get_data(:dest_host, :MIGR_HOST) + get_data(:local_mfile, :LOCAL_MIGRATE_FILE) + get_data(:remote_mfile, :REMOTE_MIGRATE_FILE) # For disk hotplugging get_data(:disk_target_path) @@ -536,21 +538,39 @@ def cancel(id, drv_message) # def save(id, drv_message) action = VmmAction.new(self, id, :save, drv_message) + steps = [] - steps = [ - # Save the Virtual Machine state - { - :driver => :vmm, - :action => :save, - :parameters => [:deploy_id, :checkpoint_file, :host] - }, - # Execute networking clean up operations - { - :driver => :vnm, - :action => :clean, - :parameters => [:host] + local_mfile = action.data[:local_mfile] + is_action_local = action_is_local?(:save) + + if !is_action_local && local_mfile && File.size?(local_mfile) + mdata = File.read(local_mfile) + mfile = action.data[:remote_mfile] + + # Save migration data to remote location + steps << { + :driver => :vmm, + :action => "/bin/cat - >#{mfile}", + :is_local => false, + :stdin => mdata, + :no_extra_params => true } - ] + end + + steps.concat([ + # Save the Virtual Machine state + { + :driver => :vmm, + :action => :save, + :parameters => [:deploy_id, :checkpoint_file, :host] + }, + # Execute networking clean up operations + { + :driver => :vnm, + :action => :clean, + :parameters => [:host] + } + ]) action.run(steps) end @@ -620,7 +640,33 @@ def migrate(id, drv_message) post << action.data[:tm_command] failed << action.data[:tm_command] - steps = [ + steps = [] + + is_action_local = action_is_local?(:migrate) + + if !is_action_local + local_mfile = action.data[:local_mfile] + + if !local_mfile || File.empty?(local_mfile) + send_message(ACTION[:migrate], RESULT[:failure], id, + "Cannot open migrate file #{local_mfile}") + return + end + + mdata = File.read(local_mfile) + mfile = action.data[:remote_mfile] + + # Save migration data to remote location + steps << { + :driver => :vmm, + :action => "/bin/cat - >#{mfile}", + :is_local => false, + :stdin => mdata, + :no_extra_params => true + } + end + + steps.concat([ # Execute a pre-migrate TM setup { :driver => :tm, @@ -674,7 +720,7 @@ def migrate(id, drv_message) :stdin => action.data[:vm], :no_fail => true } - ] + ]) action.run(steps) end diff --git a/src/vmm_mad/remotes/kvm/migrate b/src/vmm_mad/remotes/kvm/migrate index 36151eedc4c..55d0c8b54ac 100755 --- a/src/vmm_mad/remotes/kvm/migrate +++ b/src/vmm_mad/remotes/kvm/migrate @@ -69,6 +69,7 @@ load_remote_env # - @dst_host # - @vm_dir # - @shared +# - @migrate_file # ------------------------------------------------------------------------------ # sync paths to the destination VM folder @@ -192,7 +193,7 @@ def local_migration(kvm_vm) rsync_paths(pre_sync) - rc, _out, err = kvm_vm.live_migrate_disks(@dst_host, devs) + rc, _out, err = kvm_vm.live_migrate_disks(@dst_host, devs, @migrate_file) cleanup_host(err) if rc != 0 @@ -212,6 +213,7 @@ begin @vm_dir = Pathname.new(action_xml['/VMM_DRIVER_ACTION_DATA/DISK_TARGET_PATH']).cleanpath @shared = action_xml['/VMM_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SHARED'].casecmp('YES') == 0 + @migrate_file = Pathname.new(action_xml['/VMM_DRIVER_ACTION_DATA/REMOTE_MIGRATE_FILE']).cleanpath kvm_vm = KvmDomain.new(@deploy_id) @@ -220,7 +222,7 @@ begin # Migrate VMs using shared/local storage if @shared - rc, _out, err = kvm_vm.live_migrate(@dst_host) + rc, _out, err = kvm_vm.live_migrate(@dst_host, @migrate_file) cleanup_host(kvm_vm, err) if rc != 0 else diff --git a/src/vmm_mad/remotes/kvm/migrate_local b/src/vmm_mad/remotes/kvm/migrate_local index 264fdf0a016..a818c19ac11 100755 --- a/src/vmm_mad/remotes/kvm/migrate_local +++ b/src/vmm_mad/remotes/kvm/migrate_local @@ -16,13 +16,24 @@ # limitations under the License. # #--------------------------------------------------------------------------- # -source $(dirname $0)/../../etc/vmm/kvm/kvmrc -source $(dirname $0)/../../scripts_common.sh +DRIVER_PATH=$(dirname $0) +source "$DRIVER_PATH/../../etc/vmm/kvm/kvmrc" +source "$DRIVER_PATH/../../scripts_common.sh" +XPATH="$DRIVER_PATH/../../datastore/xpath.rb" +STDIN=$(cat -) deploy_id=$1 dest_host=$2 src_host=$3 +unset i j XPATH_ELEMENTS +while IFS= read -r -d '' element; do + XPATH_ELEMENTS[i++]="$element" +done < <($XPATH -b $STDIN \ + /VMM_DRIVER_ACTION_DATA/LOCAL_MIGRATE_FILE) + +LOCAL_MIGRATE_FILE="${XPATH_ELEMENTS[j++]}" + # migration can't be done with domain snapshots, drop them first snaps=$(monitor_and_log \ "virsh --connect $QEMU_PROTOCOL://$src_host/system snapshot-list $deploy_id --name 2>/dev/null" \ @@ -42,7 +53,7 @@ fi # do live migration, but cleanup target host in case of error virsh --connect $QEMU_PROTOCOL://$src_host/system \ - migrate --live $deploy_id $MIGRATE_OPTIONS $QEMU_PROTOCOL://$dest_host/system + migrate --live $deploy_id $MIGRATE_OPTIONS --xml $LOCAL_MIGRATE_FILE $QEMU_PROTOCOL://$dest_host/system RC=$? diff --git a/src/vmm_mad/remotes/kvm/save b/src/vmm_mad/remotes/kvm/save index 4d17a35313b..fcd203b68c5 100755 --- a/src/vmm_mad/remotes/kvm/save +++ b/src/vmm_mad/remotes/kvm/save @@ -21,12 +21,11 @@ DRIVER_PATH=$(dirname $0) source $DRIVER_PATH/../../etc/vmm/kvm/kvmrc source $DRIVER_PATH/../../scripts_common.sh +DRV_MESSAGE=$(cat) + DEPLOY_ID=$1 FILE=$2 -VM_ID=$4 -DS_ID=$5 - if [ -f "$FILE" ]; then log "Moving old checkpoint file $FILE" epoch=`date +%s` @@ -38,9 +37,20 @@ fi touch "$FILE" chmod 666 "$FILE" -retry_if "active block job" 3 5 \ +# Extracting the migrate file +XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin" +IFS= read -r -d '' MIGRATE_FILE < <(echo "$DRV_MESSAGE" | $XPATH /VMM_DRIVER_ACTION_DATA/REMOTE_MIGRATE_FILE) + +# Check if MIGRATE_FILE is not empty and the file exists +if [ -n "$MIGRATE_FILE" ] && [ -f "$MIGRATE_FILE" ]; then + retry_if "active block job" 3 5 \ + exec_and_log "virsh --connect $LIBVIRT_URI save $DEPLOY_ID $FILE --xml $MIGRATE_FILE" \ + "Could not save $DEPLOY_ID to $FILE" +else + retry_if "active block job" 3 5 \ exec_and_log "virsh --connect $LIBVIRT_URI save $DEPLOY_ID $FILE" \ "Could not save $DEPLOY_ID to $FILE" +fi # Compact memory if [ "x$CLEANUP_MEMORY_ON_STOP" = "xyes" ]; then @@ -52,13 +62,10 @@ fi #------------------------------------------------------------------------------- # Exit if no stdin data is available -if [ -t 0 ]; then +if [ -z "$DRV_MESSAGE" ]; then exit 0 fi -# There is data in stdin, read it -DRV_MESSAGE=$(cat) - # The data is the driver message. Extracting the System DS TM_MAD XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin" IFS= read -r -d '' TM_MAD < <(echo "$DRV_MESSAGE" | $XPATH /VMM_DRIVER_ACTION_DATA/DATASTORE/TM_MAD) diff --git a/src/vmm_mad/remotes/lib/kvm/opennebula_vm.rb b/src/vmm_mad/remotes/lib/kvm/opennebula_vm.rb index 118626ca2c9..5e1fb77672b 100644 --- a/src/vmm_mad/remotes/lib/kvm/opennebula_vm.rb +++ b/src/vmm_mad/remotes/lib/kvm/opennebula_vm.rb @@ -290,8 +290,8 @@ def readonly?(disk_path) # Live migrate the domain to the target host (SHARED STORAGE variant) # @param host[String] name of the target host - def live_migrate(host) - cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} #{@domain}" + def live_migrate(host, mfile) + cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} --xml #{mfile} #{@domain}" cmd << " #{virsh_uri(host)}" virsh_retry(cmd, 'active block job', virsh_tries) @@ -300,8 +300,8 @@ def live_migrate(host) # Live migrate the domain to the target host (LOCAL STORAGE variant) # @param host[String] name of the target host # @param devs[Array] of the disks that will be copied - def live_migrate_disks(host, devs) - cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} --suspend" + def live_migrate_disks(host, devs, mfile) + cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} --xml #{mfile} --suspend" cmd << " #{@domain} #{virsh_uri(host)}" if !devs.empty?