Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(manager): ignore NotADirectory error when uninstalling services #1855

Merged
merged 3 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sn_node_manager/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ sn_service_management = { path = "../sn_service_management", version = "0.3.5" }
sn-releases = "0.2.6"
sn_transfers = { path = "../sn_transfers", version = "0.18.6" }
sysinfo = "0.30.12"
thiserror = "1.0.23"
tokio = { version = "1.26", features = ["full"] }
tracing = { version = "~0.1.26" }
tonic = { version = "0.6.2" }
Expand Down
36 changes: 36 additions & 0 deletions sn_node_manager/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2024 MaidSafe.net limited.
//
// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3.
// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed
// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. Please review the Licences for the specific language governing
// permissions and limitations relating to use of the SAFE Network Software.

use thiserror::Error;

pub type Result<T, E = Error> = std::result::Result<T, E>;

#[derive(Debug, Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
Json(#[from] serde_json::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("The PID of the process was not found after starting it.")]
PidNotFoundAfterStarting,
#[error("The PID of the process was not set.")]
PidNotSet,
#[error(transparent)]
SemverError(#[from] semver::Error),
#[error("The service(s) is already running: {0:?}")]
ServiceAlreadyRunning(Vec<String>),
#[error("The service(s) is not running: {0:?}")]
ServiceNotRunning(Vec<String>),
#[error(transparent)]
ServiceManagementError(#[from] sn_service_management::Error),
#[error("The service status is not as expected. Expected: {expected:?}")]
ServiceStatusMismatch {
expected: sn_service_management::ServiceStatus,
},
}
89 changes: 53 additions & 36 deletions sn_node_manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ extern crate tracing;
pub mod add_services;
pub mod cmd;
pub mod config;
pub mod error;
pub mod helpers;
pub mod local;
pub mod rpc;
Expand All @@ -35,10 +36,7 @@ impl From<u8> for VerbosityLevel {
}
}

use color_eyre::{
eyre::{eyre, OptionExt},
Help, Result,
};
use crate::error::{Error, Result};
use colored::Colorize;
use semver::Version;
use sn_service_management::{
Expand Down Expand Up @@ -121,10 +119,7 @@ impl<T: ServiceStateActions + Send> ServiceManager<T> {
}
Err(sn_service_management::error::Error::ServiceProcessNotFound(_)) => {
error!("The '{}' service has failed to start because ServiceProcessNotFound when fetching PID", self.service.name());
return Err(eyre!(
"The '{}' service has failed to start",
self.service.name()
));
return Err(Error::PidNotFoundAfterStarting);
}
Err(err) => {
error!("Failed to start service, because PID could not be obtained: {err}");
Expand Down Expand Up @@ -187,7 +182,7 @@ impl<T: ServiceStateActions + Send> ServiceManager<T> {
Ok(())
}
ServiceStatus::Running => {
let pid = self.service.pid().ok_or_eyre("The PID was not set")?;
let pid = self.service.pid().ok_or(Error::PidNotSet)?;
let name = self.service.name();

if self.service_control.is_service_process_running(pid) {
Expand Down Expand Up @@ -229,30 +224,27 @@ impl<T: ServiceStateActions + Send> ServiceManager<T> {

pub async fn remove(&mut self, keep_directories: bool) -> Result<()> {
if let ServiceStatus::Running = self.service.status() {
if self.service_control.is_service_process_running(
self.service
.pid()
.ok_or_eyre("Could not obtain PID for running node")?,
) {
if self
.service_control
.is_service_process_running(self.service.pid().ok_or(Error::PidNotSet)?)
{
error!(
"Service {} is already running. Stop it before removing it",
self.service.name()
);
return Err(eyre!("A running service cannot be removed")
.suggestion("Stop the node first then try again"));
}
// If the node wasn't actually running, we should give the user an opportunity to
// check why it may have failed before removing everything.
self.service.on_stop().await?;
error!(
"The service: {} was marked as running but it had actually stopped",
return Err(Error::ServiceAlreadyRunning(vec![self.service.name()]));
} else {
// If the node wasn't actually running, we should give the user an opportunity to
// check why it may have failed before removing everything.
self.service.on_stop().await?;
error!(
"The service: {} was marked as running but it had actually stopped. You may want to check the logs for errors before removing it. To remove the service, run the command again.",
self.service.name()
);
return Err(
eyre!("This service was marked as running but it had actually stopped")
.suggestion("You may want to check the logs for errors before removing it")
.suggestion("To remove the service, run the command again."),
);
return Err(Error::ServiceStatusMismatch {
expected: ServiceStatus::Running,
});
}
}

match self
Expand All @@ -264,13 +256,16 @@ impl<T: ServiceStateActions + Send> ServiceManager<T> {
}
Err(err) => match err {
ServiceError::ServiceRemovedManually(name) => {
warn!("The user appears to have removed the {name} service manually",);
warn!("The user appears to have removed the {name} service manually. Skipping the error.",);
// The user has deleted the service definition file, which the service manager
// crate treats as an error. We then return our own error type, which allows us
// to handle it here and just proceed with removing the service from the
// registry.
println!("The user appears to have removed the {name} service manually");
}
ServiceError::ServiceDoesNotExists(name) => {
warn!("The service {name} has most probably been removed already, it does not exists. Skipping the error.");
}
_ => {
error!("Error uninstalling the service: {err}");
return Err(err.into());
Expand Down Expand Up @@ -485,13 +480,28 @@ pub async fn status_report(
}
}

if fail
&& node_registry
if fail {
let non_running_services = node_registry
.nodes
.iter()
.any(|n| n.status != ServiceStatus::Running)
{
return Err(eyre!("One or more nodes are not in a running state"));
.filter_map(|n| {
if n.status != ServiceStatus::Running {
Some(n.service_name.clone())
} else {
None
}
})
.collect::<Vec<String>>();
if non_running_services.is_empty() {
info!("Fail is set to true, but all services are running.");
} else {
error!(
"One or more nodes are not in a running state: {non_running_services:?}
"
);

return Err(Error::ServiceNotRunning(non_running_services));
}
}

Ok(())
Expand Down Expand Up @@ -617,6 +627,7 @@ mod tests {
use assert_fs::prelude::*;
use assert_matches::assert_matches;
use async_trait::async_trait;
use color_eyre::eyre::Result;
use libp2p_identity::PeerId;
use mockall::{mock, predicate::*};
use predicates::prelude::*;
Expand Down Expand Up @@ -1078,7 +1089,10 @@ mod tests {
let result = service_manager.start().await;
match result {
Ok(_) => panic!("This test should have resulted in an error"),
Err(e) => assert_eq!("The 'safenode1' service has failed to start", e.to_string()),
Err(e) => assert_eq!(
"The PID of the process was not found after starting it.",
e.to_string()
),
}

Ok(())
Expand Down Expand Up @@ -3542,7 +3556,10 @@ mod tests {
let result = service_manager.remove(false).await;
match result {
Ok(_) => panic!("This test should result in an error"),
Err(e) => assert_eq!("A running service cannot be removed", e.to_string()),
Err(e) => assert_eq!(
"The service(s) is already running: [\"safenode1\"]",
e.to_string()
),
}

Ok(())
Expand Down Expand Up @@ -3605,7 +3622,7 @@ mod tests {
match result {
Ok(_) => panic!("This test should result in an error"),
Err(e) => assert_eq!(
"This service was marked as running but it had actually stopped",
"The service status is not as expected. Expected: Running",
e.to_string()
),
}
Expand Down
21 changes: 14 additions & 7 deletions sn_service_management/src/control.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,19 +264,26 @@ impl ServiceControl for ServiceController {
}
match manager.uninstall(ServiceUninstallCtx { label }) {
Ok(()) => Ok(()),
Err(e) => match e.kind() {
std::io::ErrorKind::NotFound => {
Err(err) => {
if std::io::ErrorKind::NotFound == err.kind() {
error!("Error while uninstall service, service file might have been removed manually: {service_name}");
// In this case the user has removed the service definition file manually,
// which the service manager crate treats as an error. We can propagate the
// it to the caller and they can decide how to handle it.
Err(Error::ServiceRemovedManually(service_name.to_string()))
} else if err.raw_os_error() == Some(267) {
// This requires the unstable io_error_more feature, use raw code for now
// else if err.kind() == std::io::ErrorKind::NotADirectory {}

// This happens on windows when the service has been already cleared, but was not updated
// in the registry. Happens when the Service application (in windows) is open while calling
// 'remove' or 'reset'.
Err(Error::ServiceDoesNotExists(service_name.to_string()))
} else {
error!("Error while uninstalling service: {err:?}");
Err(err.into())
}
_ => {
error!("Error while uninstalling service: {e:?}");
Err(e.into())
}
},
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions sn_service_management/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ pub enum Error {
RpcRecordAddressError(String),
#[error("Could not find process at '{0}'")]
ServiceProcessNotFound(String),
#[error("The service '{0}' does not exists and cannot be removed.")]
ServiceDoesNotExists(String),
#[error("The user may have removed the '{0}' service outwith the node manager")]
ServiceRemovedManually(String),
#[error("Failed to create service user account")]
Expand Down
2 changes: 1 addition & 1 deletion sn_service_management/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ pub mod safenode_manager_proto {
tonic::include_proto!("safenode_manager_proto");
}

use crate::error::{Error, Result};
use async_trait::async_trait;
use auditor::AuditorServiceData;
use libp2p::Multiaddr;
Expand All @@ -34,6 +33,7 @@ use std::{
};

pub use daemon::{DaemonService, DaemonServiceData};
pub use error::{Error, Result};
pub use faucet::{FaucetService, FaucetServiceData};
pub use node::{NodeService, NodeServiceData};

Expand Down
Loading