Skip to content

Commit

Permalink
B #6772: Fix for NUMA and CPU Pinning Discrepancies During VM Save an…
Browse files Browse the repository at this point in the history
…d Live Migration

Signed-off-by: Kristian Feldsam <[email protected]>
  • Loading branch information
feldsam committed Nov 4, 2024
1 parent 0bd4511 commit 305b33e
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 46 deletions.
2 changes: 2 additions & 0 deletions include/History.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,13 @@ class History:public ObjectSQL, public ObjectXML
std::string deployment_file;
std::string context_file;
std::string token_file;
std::string migrate_file;

// Remote paths
std::string checkpoint_file;
std::string rdeployment_file;
std::string system_dir;
std::string rmigrate_file;

/**
* Writes the history record in the DB
Expand Down
24 changes: 24 additions & 0 deletions include/VirtualMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,19 @@ class VirtualMachine : public PoolObjectSQL
return history->token_file;
}

/**
* Returns the migrate filename. The migrate file is in the form:
* $ONE_LOCATION/var/vms/$VM_ID/migrate.$SEQ
* or, in case that OpenNebula is installed in root
* /var/lib/one/vms/$VM_ID/migrate.$SEQ
* The hasHistory() function MUST be called before this one.
* @return the migrate file path
*/
const std::string & get_migrate_file() const
{
return history->migrate_file;
};

/**
* Returns the remote deployment filename. The file is in the form:
* $DS_LOCATION/$SYSTEM_DS/$VM_ID/deployment.$SEQ
Expand All @@ -596,6 +609,17 @@ class VirtualMachine : public PoolObjectSQL
return history->rdeployment_file;
};

/**
* Returns the remote migrate filename. The file is in the form:
* $DS_LOCATION/$SYSTEM_DS/$VM_ID/migrate.$SEQ
* The hasHistory() function MUST be called before this one.
* @return the migrate filename
*/
const std::string & get_rmigrate_file() const
{
return history->rmigrate_file;
};

/**
* Returns the checkpoint filename for the current host. The checkpoint file
* is in the form:
Expand Down
4 changes: 3 additions & 1 deletion include/VirtualMachineManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,9 @@ class VirtualMachineManager :
const std::string& tmpl,
int ds_id,
int sgid = -1,
int nicid = -1);
int nicid = -1,
const std::string& lmfile = "",
const std::string& rmfile = "");

public:
/**
Expand Down
8 changes: 0 additions & 8 deletions src/rm/RequestManagerVirtualMachine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1181,14 +1181,6 @@ void VirtualMachineMigrate::request_execute(xmlrpc_c::paramList const& paramList
if (live)
{
action = VMActions::LIVE_MIGRATE_ACTION;

if ( vm->is_pinned() )
{
att.resp_msg = "VM with a pinned NUMA topology cannot be live-migrated";
failure_response(ACTION, att);

return;
}
}
else
{
Expand Down
10 changes: 10 additions & 0 deletions src/vm/History.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ void History::non_persistent_data()

token_file = os.str();

os.str("");
os << vm_lhome << "/migrate." << seq;

migrate_file = os.str();

// ----------- Remote Locations ------------
os.str("");
os << ds_location << "/" << ds_id << "/" << oid;
Expand All @@ -141,6 +146,11 @@ void History::non_persistent_data()
os << system_dir << "/deployment." << seq;

rdeployment_file = os.str();

os.str("");
os << system_dir << "/migrate." << seq;

rmigrate_file = os.str();
}

/* -------------------------------------------------------------------------- */
Expand Down
2 changes: 2 additions & 0 deletions src/vmm/LibVirtDriverKVM.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2316,5 +2316,7 @@ int LibVirtDriver::deployment_description_kvm(

file << "</domain>" << endl;

file.close();

return 0;
}
79 changes: 74 additions & 5 deletions src/vmm/VirtualMachineManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ string VirtualMachineManager::format_message(
const string& tmpl,
int ds_id,
int sgid,
int nicid)
int nicid,
const string& lmfile,
const string& rmfile)
{
ostringstream oss;

Expand Down Expand Up @@ -266,6 +268,17 @@ string VirtualMachineManager::format_message(
oss << "<REMOTE_DEPLOYMENT_FILE/>";
}

if (!lmfile.empty())
{
oss << "<LOCAL_MIGRATE_FILE>" << lmfile << "</LOCAL_MIGRATE_FILE>";
oss << "<REMOTE_MIGRATE_FILE>" << rmfile << "</REMOTE_MIGRATE_FILE>";
}
else
{
oss << "<LOCAL_MIGRATE_FILE/>";
oss << "<REMOTE_MIGRATE_FILE/>";
}

if (!cfile.empty())
{
oss << "<CHECKPOINT_FILE>" << cfile << "</CHECKPOINT_FILE>";
Expand Down Expand Up @@ -495,8 +508,9 @@ void VirtualMachineManager::trigger_save(int vid)
trigger([this, vid]
{
const VirtualMachineManagerDriver * vmd;
int rc;

string hostname, checkpoint_file;
string hostname, checkpoint_file, migrate_file, rmigrate_file;
string vm_tmpl;
string drv_msg;
int ds_id;
Expand Down Expand Up @@ -535,12 +549,31 @@ void VirtualMachineManager::trigger_save(int vid)
hostname = vm->get_previous_hostname();
checkpoint_file = vm->get_previous_checkpoint_file();
ds_id = vm->get_previous_ds_id();

//Generate VM description file
os << "Generating migrate file: " << vm->get_migrate_file();

vm->log("VMM", Log::INFO, os);

os.str("");

rc = vmd->deployment_description(vm.get(), vm->get_migrate_file());

if (rc != 0)
{
goto error_file;
}

migrate_file = vm->get_migrate_file();
rmigrate_file = vm->get_rmigrate_file();
}
else
{
hostname = vm->get_hostname();
checkpoint_file = vm->get_checkpoint_file();
ds_id = vm->get_ds_id();
migrate_file = "";
rmigrate_file = "";
}

// Invoke driver method
Expand All @@ -556,7 +589,10 @@ void VirtualMachineManager::trigger_save(int vid)
"",
vm->to_xml(vm_tmpl),
ds_id,
-1);
-1,
-1,
migrate_file,
rmigrate_file);

vmd->save(vid, drv_msg);

Expand All @@ -570,6 +606,11 @@ void VirtualMachineManager::trigger_save(int vid)
os << "save_action, error getting driver " << vm->get_vmm_mad();
goto error_common;

error_file:
os << "save_action, error generating migrate file: "
<< vm->get_migrate_file();
goto error_common;

error_previous_history:
os << "save_action, VM has no previous history";

Expand Down Expand Up @@ -1154,10 +1195,12 @@ void VirtualMachineManager::trigger_migrate(int vid)
trigger([this, vid]
{
const VirtualMachineManagerDriver * vmd;
int rc;

ostringstream os;
string vm_tmpl;
string drv_msg;
string tm_command = "";

// Get the VM from the pool
auto vm = vmpool->get(vid);
Expand Down Expand Up @@ -1187,6 +1230,24 @@ void VirtualMachineManager::trigger_migrate(int vid)

Nebula::instance().get_tm()->migrate_transfer_command(vm.get(), os);

tm_command = os.str();

os.str("");

//Generate VM description file
os << "Generating migrate file: " << vm->get_migrate_file();

vm->log("VMM", Log::INFO, os);

os.str("");

rc = vmd->deployment_description(vm.get(), vm->get_migrate_file());

if (rc != 0)
{
goto error_file;
}

// Invoke driver method
drv_msg = format_message(
vm->get_previous_hostname(),
Expand All @@ -1195,12 +1256,15 @@ void VirtualMachineManager::trigger_migrate(int vid)
"",
"",
"",
os.str(),
tm_command,
"",
vm->get_system_dir(),
vm->to_xml(vm_tmpl),
vm->get_previous_ds_id(),
-1);
-1,
-1,
vm->get_migrate_file(),
vm->get_rmigrate_file());

vmd->migrate(vid, drv_msg);

Expand All @@ -1214,6 +1278,11 @@ void VirtualMachineManager::trigger_migrate(int vid)
os << "migrate_action, error getting driver " << vm->get_vmm_mad();
goto error_common;

error_file:
os << "migrate_action, error generating migrate file: "
<< vm->get_migrate_file();
goto error_common;

error_previous_history:
os << "migrate_action, error VM has no previous history";

Expand Down
76 changes: 61 additions & 15 deletions src/vmm_mad/exec/one_vmm_exec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def initialize(driver, id, action, xml_data)

# For migration
get_data(:dest_host, :MIGR_HOST)
get_data(:local_mfile, :LOCAL_MIGRATE_FILE)
get_data(:remote_mfile, :REMOTE_MIGRATE_FILE)

# For disk hotplugging
get_data(:disk_target_path)
Expand Down Expand Up @@ -536,21 +538,39 @@ def cancel(id, drv_message)
#
def save(id, drv_message)
action = VmmAction.new(self, id, :save, drv_message)
steps = []

steps = [
# Save the Virtual Machine state
{
:driver => :vmm,
:action => :save,
:parameters => [:deploy_id, :checkpoint_file, :host]
},
# Execute networking clean up operations
{
:driver => :vnm,
:action => :clean,
:parameters => [:host]
local_mfile = action.data[:local_mfile]
is_action_local = action_is_local?(:save)

if !is_action_local && local_mfile && File.size?(local_mfile)
mdata = File.read(local_mfile)
mfile = action.data[:remote_mfile]

# Save migration data to remote location
steps << {
:driver => :vmm,
:action => "/bin/cat - >#{mfile}",
:is_local => false,
:stdin => mdata,
:no_extra_params => true
}
]
end

steps.concat([
# Save the Virtual Machine state
{
:driver => :vmm,
:action => :save,
:parameters => [:deploy_id, :checkpoint_file, :host]
},
# Execute networking clean up operations
{
:driver => :vnm,
:action => :clean,
:parameters => [:host]
}
])

action.run(steps)
end
Expand Down Expand Up @@ -620,7 +640,33 @@ def migrate(id, drv_message)
post << action.data[:tm_command]
failed << action.data[:tm_command]

steps = [
steps = []

is_action_local = action_is_local?(:migrate)

if !is_action_local
local_mfile = action.data[:local_mfile]

if !local_mfile || File.empty?(local_mfile)
send_message(ACTION[:migrate], RESULT[:failure], id,
"Cannot open migrate file #{local_mfile}")
return
end

mdata = File.read(local_mfile)
mfile = action.data[:remote_mfile]

# Save migration data to remote location
steps << {
:driver => :vmm,
:action => "/bin/cat - >#{mfile}",
:is_local => false,
:stdin => mdata,
:no_extra_params => true
}
end

steps.concat([
# Execute a pre-migrate TM setup
{
:driver => :tm,
Expand Down Expand Up @@ -674,7 +720,7 @@ def migrate(id, drv_message)
:stdin => action.data[:vm],
:no_fail => true
}
]
])

action.run(steps)
end
Expand Down
Loading

0 comments on commit 305b33e

Please sign in to comment.