Skip to content

Commit

Permalink
B #6687: Fix monitoring initialization and restart
Browse files Browse the repository at this point in the history
* Cleanup oned in case of initialization error

* Fix monitoring after onemonitord restart. The code includes a "hook" point in case a driver is re-started so custom code can be executed. InformationManager sends the list of hosts and raft status in this case.

* B #5801: Update error msg, in case of duplicated drivers
  • Loading branch information
paczerny authored Sep 5, 2024
1 parent 822581a commit 7b07def
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 64 deletions.
15 changes: 15 additions & 0 deletions include/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ class Driver
streamer.register_action(t, a);
};

/**
* Set a callback to be called when the driver is restarted and reconnects
*/
void set_reconnect_callback(std::function<void()> callback)
{
reconnect_callback = callback;
}

protected:
Driver() = default;

Expand Down Expand Up @@ -161,6 +169,11 @@ class Driver
*/
std::atomic<bool> terminate = {false};

/**
* Reconnect callback, called when the driver restarts
*/
std::function<void()> reconnect_callback;

/**
* Starts the driver. This function creates a new process and sets up the
* communication pipes.
Expand Down Expand Up @@ -358,6 +371,8 @@ ::start_listener()
start_driver(error);

streamer.fd(from_drv);

if (reconnect_callback) reconnect_callback();
}
});
}
Expand Down
9 changes: 9 additions & 0 deletions include/DriverManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ class DriverManager

static Log::MessageType log_type(char type);

/**
* Callback called when the driver is reconnected. Override this function
* to perform any actions when the driver is reconnected
*/
virtual void reconnected() {};

private:
std::map<std::string, std::unique_ptr<D>> drivers;

Expand Down Expand Up @@ -207,7 +213,10 @@ int DriverManager<D>::start(std::string& error)
{
for (auto& driver : drivers)
{
driver.second->set_reconnect_callback(std::bind(&DriverManager<D>::reconnected, this));

auto rc = driver.second->start(error);

if (rc != 0)
{
NebulaLog::error("DrM", "Unable to start driver '" + driver.first
Expand Down
5 changes: 5 additions & 0 deletions include/InformationManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class InformationManager : public DriverManager<Driver<im_msg_t>>
*/
void raft_status(RaftManager::State raft);

/**
* Called when the driver is reconnected
*/
void reconnected() override;

protected:
/**
* Received undefined message -> print error
Expand Down
5 changes: 5 additions & 0 deletions include/NebulaLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ class NebulaLog
return _log_type;
};

static bool initialized()
{
return (logger != 0);
}

private:
NebulaLog() {};

Expand Down
11 changes: 7 additions & 4 deletions share/scripts/one
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
if [ -z "$ONE_LOCATION" ]; then
ONE_PID=/var/run/one/oned.pid
ONE_SCHEDPID=/var/run/one/sched.pid
ONE_HEMPID=/var/run/one/hem.pid
ONE_CONF=/etc/one/oned.conf
ONE_DB=/var/lib/one/one.db
ONE_LOG=/var/log/one/oned.log
Expand All @@ -36,7 +35,6 @@ if [ -z "$ONE_LOCATION" ]; then
else
ONE_PID=$ONE_LOCATION/var/oned.pid
ONE_SCHEDPID=$ONE_LOCATION/var/sched.pid
ONE_HEMPID=$ONE_LOCATION/var/hem.pid
ONE_CONF=$ONE_LOCATION/etc/oned.conf
ONE_DB=$ONE_LOCATION/var/one.db
ONE_LOG=$ONE_LOCATION/var/oned.log
Expand Down Expand Up @@ -246,14 +244,19 @@ start_hem()
[ -f "$ONE_HEM_LOG" ] && mv $ONE_HEM_LOG{,.$(date '+%Y%m%d%H%M%S')}
fi

onehem-server start > /dev/null 2>&1
HEM_ERROR=$(mktemp /tmp/hem-error.XXXXXX)

onehem-server start > $HEM_ERROR 2>&1

LASTRC=$?

if [ $LASTRC -ne 0 ]; then
echo "Error starting onehem-server"
echo "Error starting onehem-server: $(cat $HEM_ERROR)"
rm -f $HEM_ERROR
exit 1
fi

rm -f $HEM_ERROR
}

#------------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions src/im/InformationManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,16 @@ void InformationManager::raft_status(RaftManager::State state)
imd->write(msg);
}

/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

void InformationManager::reconnected()
{
auto rftm = Nebula::instance().get_raftm();
raft_status(rftm->get_state());
}


/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

Expand Down
3 changes: 1 addition & 2 deletions src/monitor/src/monitor/Monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ void Monitor::start()

if (config->load_configuration() != 0)
{
throw runtime_error("Error reading monitor configuration file" +
conf_filename);
throw runtime_error("Error reading monitor configuration file " + conf_filename);
}

string datastore_location;
Expand Down
117 changes: 61 additions & 56 deletions src/nebula/Nebula.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,67 @@ using namespace std;

Nebula::~Nebula()
{
// -----------------------------------------------------------
// Stop the managers & free resources
// -----------------------------------------------------------

if (rm) rm->finalize();

if (raftm) raftm->finalize();

if (!cache)
{
if (sam) sam->finalize();

if (vmm) vmm->finalize();
if (lcm) lcm->finalize();

if (tm) tm->finalize();
if (dm) dm->finalize();

if (im) im->finalize();
if (hm) hm->finalize();

if (imagem) imagem->finalize();
if (marketm) marketm->finalize();

if (ipamm) ipamm->finalize();

//sleep to wait drivers???
if (vmm) vmm->join_thread();
if (lcm) lcm->join_thread();
if (tm) tm->join_thread();
if (dm) dm->join_thread();

if (hm) hm->join_thread();
if (ipamm) ipamm->join_thread();
}

if (aclm) aclm->finalize();

if (authm)
{
authm->finalize();

authm->join_thread();
}

if (is_federation_slave() && aclm)
{
aclm->join_thread();
}


//XML Library
xmlCleanupParser();

ssl_util::SSLMutex::finalize();

if (NebulaLog::initialized())
{
NebulaLog::log("ONE", Log::INFO, "All modules finalized, exiting.\n");
}

delete vmpool;
delete vnpool;
delete hpool;
Expand Down Expand Up @@ -1201,7 +1262,6 @@ void Nebula::start(bool bootstrap_only)

#ifdef SYSTEMD
// ---- Notify service manager ----

sd_notify(0, "READY=1");
#endif

Expand All @@ -1215,61 +1275,6 @@ void Nebula::start(bool bootstrap_only)

sigwait(&mask, &signal);

// -----------------------------------------------------------
// Stop the managers & free resources
// -----------------------------------------------------------

rm->finalize();

raftm->finalize();

if (!cache)
{
sam->finalize();

vmm->finalize();
lcm->finalize();

tm->finalize();
dm->finalize();

im->finalize();
hm->finalize();

imagem->finalize();
marketm->finalize();

ipamm->finalize();

//sleep to wait drivers???
vmm->join_thread();
lcm->join_thread();
tm->join_thread();
dm->join_thread();

hm->join_thread();
ipamm->join_thread();
}

aclm->finalize();

authm->finalize();

authm->join_thread();

if (is_federation_slave())
{
aclm->join_thread();
}


//XML Library
xmlCleanupParser();

ssl_util::SSLMutex::finalize();

NebulaLog::log("ONE", Log::INFO, "All modules finalized, exiting.\n");

return;

error_mad:
Expand Down
2 changes: 1 addition & 1 deletion src/template/NebulaTemplate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int NebulaTemplate::load_configuration()

if ( rc != 0 && error != 0)
{
cout << "\nError while parsing configuration file:\n" << error << endl;
cout << "\nError while parsing configuration file: " << error << endl;

free(error);

Expand Down
1 change: 1 addition & 0 deletions src/vmm/VirtualMachineManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2742,6 +2742,7 @@ int VirtualMachineManager::load_drivers(const vector<const VectorAttribute*>& _m

if ( rc != 0 )
{
NebulaLog::error("VMM", "\tDriver already exists, name: " + name);
return rc;
}

Expand Down
2 changes: 1 addition & 1 deletion src/vmm/VirtualMachineManagerDriver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ VirtualMachineManagerDriver::VirtualMachineManagerDriver(
}
else
{
NebulaLog::log("VMM", Log::INFO, "Using default imported VMs actions");
NebulaLog::log("VMM", Log::INFO, "\tUsing default imported VMs actions");

it = attrs.find("NAME");

Expand Down

0 comments on commit 7b07def

Please sign in to comment.