diff --git a/Cargo.lock b/Cargo.lock index d2bce90204..fe30c88a0d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6948,6 +6948,7 @@ dependencies = [ "sn_service_management", "sn_transfers", "sysinfo", + "thiserror", "tokio", "tonic 0.6.2", "tracing", diff --git a/sn_node_manager/Cargo.toml b/sn_node_manager/Cargo.toml index af16e03b6a..346e1e7e0a 100644 --- a/sn_node_manager/Cargo.toml +++ b/sn_node_manager/Cargo.toml @@ -50,6 +50,7 @@ sn_service_management = { path = "../sn_service_management", version = "0.3.5" } sn-releases = "0.2.6" sn_transfers = { path = "../sn_transfers", version = "0.18.6" } sysinfo = "0.30.12" +thiserror = "1.0.23" tokio = { version = "1.26", features = ["full"] } tracing = { version = "~0.1.26" } tonic = { version = "0.6.2" } diff --git a/sn_node_manager/src/error.rs b/sn_node_manager/src/error.rs new file mode 100644 index 0000000000..dd892c77a2 --- /dev/null +++ b/sn_node_manager/src/error.rs @@ -0,0 +1,36 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use thiserror::Error; + +pub type Result = std::result::Result; + +#[derive(Debug, Error)] +#[allow(missing_docs)] +pub enum Error { + #[error(transparent)] + Json(#[from] serde_json::Error), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error("The PID of the process was not found after starting it.")] + PidNotFoundAfterStarting, + #[error("The PID of the process was not set.")] + PidNotSet, + #[error(transparent)] + SemverError(#[from] semver::Error), + #[error("The service(s) is already running: {0:?}")] + ServiceAlreadyRunning(Vec), + #[error("The service(s) is not running: {0:?}")] + ServiceNotRunning(Vec), + #[error(transparent)] + ServiceManagementError(#[from] sn_service_management::Error), + #[error("The service status is not as expected. Expected: {expected:?}")] + ServiceStatusMismatch { + expected: sn_service_management::ServiceStatus, + }, +} diff --git a/sn_node_manager/src/lib.rs b/sn_node_manager/src/lib.rs index b0aa43afa5..f44c3d36e7 100644 --- a/sn_node_manager/src/lib.rs +++ b/sn_node_manager/src/lib.rs @@ -12,6 +12,7 @@ extern crate tracing; pub mod add_services; pub mod cmd; pub mod config; +pub mod error; pub mod helpers; pub mod local; pub mod rpc; @@ -35,10 +36,7 @@ impl From for VerbosityLevel { } } -use color_eyre::{ - eyre::{eyre, OptionExt}, - Help, Result, -}; +use crate::error::{Error, Result}; use colored::Colorize; use semver::Version; use sn_service_management::{ @@ -121,10 +119,7 @@ impl ServiceManager { } Err(sn_service_management::error::Error::ServiceProcessNotFound(_)) => { error!("The '{}' service has failed to start because ServiceProcessNotFound when fetching PID", self.service.name()); - return Err(eyre!( - "The '{}' service has failed to start", - self.service.name() - )); + return Err(Error::PidNotFoundAfterStarting); } Err(err) => { error!("Failed to start service, because PID could not be obtained: {err}"); @@ -187,7 +182,7 @@ impl ServiceManager { Ok(()) } ServiceStatus::Running => { - let pid = self.service.pid().ok_or_eyre("The PID was not set")?; + let pid = self.service.pid().ok_or(Error::PidNotSet)?; let name = self.service.name(); if self.service_control.is_service_process_running(pid) { @@ -229,30 +224,27 @@ impl ServiceManager { pub async fn remove(&mut self, keep_directories: bool) -> Result<()> { if let ServiceStatus::Running = self.service.status() { - if self.service_control.is_service_process_running( - self.service - .pid() - .ok_or_eyre("Could not obtain PID for running node")?, - ) { + if self + .service_control + .is_service_process_running(self.service.pid().ok_or(Error::PidNotSet)?) + { error!( "Service {} is already running. Stop it before removing it", self.service.name() ); - return Err(eyre!("A running service cannot be removed") - .suggestion("Stop the node first then try again")); - } - // If the node wasn't actually running, we should give the user an opportunity to - // check why it may have failed before removing everything. - self.service.on_stop().await?; - error!( - "The service: {} was marked as running but it had actually stopped", + return Err(Error::ServiceAlreadyRunning(vec![self.service.name()])); + } else { + // If the node wasn't actually running, we should give the user an opportunity to + // check why it may have failed before removing everything. + self.service.on_stop().await?; + error!( + "The service: {} was marked as running but it had actually stopped. You may want to check the logs for errors before removing it. To remove the service, run the command again.", self.service.name() ); - return Err( - eyre!("This service was marked as running but it had actually stopped") - .suggestion("You may want to check the logs for errors before removing it") - .suggestion("To remove the service, run the command again."), - ); + return Err(Error::ServiceStatusMismatch { + expected: ServiceStatus::Running, + }); + } } match self @@ -264,13 +256,16 @@ impl ServiceManager { } Err(err) => match err { ServiceError::ServiceRemovedManually(name) => { - warn!("The user appears to have removed the {name} service manually",); + warn!("The user appears to have removed the {name} service manually. Skipping the error.",); // The user has deleted the service definition file, which the service manager // crate treats as an error. We then return our own error type, which allows us // to handle it here and just proceed with removing the service from the // registry. println!("The user appears to have removed the {name} service manually"); } + ServiceError::ServiceDoesNotExists(name) => { + warn!("The service {name} has most probably been removed already, it does not exists. Skipping the error."); + } _ => { error!("Error uninstalling the service: {err}"); return Err(err.into()); @@ -485,13 +480,28 @@ pub async fn status_report( } } - if fail - && node_registry + if fail { + let non_running_services = node_registry .nodes .iter() - .any(|n| n.status != ServiceStatus::Running) - { - return Err(eyre!("One or more nodes are not in a running state")); + .filter_map(|n| { + if n.status != ServiceStatus::Running { + Some(n.service_name.clone()) + } else { + None + } + }) + .collect::>(); + if non_running_services.is_empty() { + info!("Fail is set to true, but all services are running."); + } else { + error!( + "One or more nodes are not in a running state: {non_running_services:?} + " + ); + + return Err(Error::ServiceNotRunning(non_running_services)); + } } Ok(()) @@ -617,6 +627,7 @@ mod tests { use assert_fs::prelude::*; use assert_matches::assert_matches; use async_trait::async_trait; + use color_eyre::eyre::Result; use libp2p_identity::PeerId; use mockall::{mock, predicate::*}; use predicates::prelude::*; @@ -1078,7 +1089,10 @@ mod tests { let result = service_manager.start().await; match result { Ok(_) => panic!("This test should have resulted in an error"), - Err(e) => assert_eq!("The 'safenode1' service has failed to start", e.to_string()), + Err(e) => assert_eq!( + "The PID of the process was not found after starting it.", + e.to_string() + ), } Ok(()) @@ -3542,7 +3556,10 @@ mod tests { let result = service_manager.remove(false).await; match result { Ok(_) => panic!("This test should result in an error"), - Err(e) => assert_eq!("A running service cannot be removed", e.to_string()), + Err(e) => assert_eq!( + "The service(s) is already running: [\"safenode1\"]", + e.to_string() + ), } Ok(()) @@ -3605,7 +3622,7 @@ mod tests { match result { Ok(_) => panic!("This test should result in an error"), Err(e) => assert_eq!( - "This service was marked as running but it had actually stopped", + "The service status is not as expected. Expected: Running", e.to_string() ), } diff --git a/sn_service_management/src/control.rs b/sn_service_management/src/control.rs index e3289fd1bf..29e1f07689 100644 --- a/sn_service_management/src/control.rs +++ b/sn_service_management/src/control.rs @@ -264,19 +264,26 @@ impl ServiceControl for ServiceController { } match manager.uninstall(ServiceUninstallCtx { label }) { Ok(()) => Ok(()), - Err(e) => match e.kind() { - std::io::ErrorKind::NotFound => { + Err(err) => { + if std::io::ErrorKind::NotFound == err.kind() { error!("Error while uninstall service, service file might have been removed manually: {service_name}"); // In this case the user has removed the service definition file manually, // which the service manager crate treats as an error. We can propagate the // it to the caller and they can decide how to handle it. Err(Error::ServiceRemovedManually(service_name.to_string())) + } else if err.raw_os_error() == Some(267) { + // This requires the unstable io_error_more feature, use raw code for now + // else if err.kind() == std::io::ErrorKind::NotADirectory {} + + // This happens on windows when the service has been already cleared, but was not updated + // in the registry. Happens when the Service application (in windows) is open while calling + // 'remove' or 'reset'. + Err(Error::ServiceDoesNotExists(service_name.to_string())) + } else { + error!("Error while uninstalling service: {err:?}"); + Err(err.into()) } - _ => { - error!("Error while uninstalling service: {e:?}"); - Err(e.into()) - } - }, + } } } diff --git a/sn_service_management/src/error.rs b/sn_service_management/src/error.rs index 726775ee69..c28a919b7a 100644 --- a/sn_service_management/src/error.rs +++ b/sn_service_management/src/error.rs @@ -45,6 +45,8 @@ pub enum Error { RpcRecordAddressError(String), #[error("Could not find process at '{0}'")] ServiceProcessNotFound(String), + #[error("The service '{0}' does not exists and cannot be removed.")] + ServiceDoesNotExists(String), #[error("The user may have removed the '{0}' service outwith the node manager")] ServiceRemovedManually(String), #[error("Failed to create service user account")] diff --git a/sn_service_management/src/lib.rs b/sn_service_management/src/lib.rs index c6761f43d1..5e33644a8d 100644 --- a/sn_service_management/src/lib.rs +++ b/sn_service_management/src/lib.rs @@ -21,7 +21,6 @@ pub mod safenode_manager_proto { tonic::include_proto!("safenode_manager_proto"); } -use crate::error::{Error, Result}; use async_trait::async_trait; use auditor::AuditorServiceData; use libp2p::Multiaddr; @@ -34,6 +33,7 @@ use std::{ }; pub use daemon::{DaemonService, DaemonServiceData}; +pub use error::{Error, Result}; pub use faucet::{FaucetService, FaucetServiceData}; pub use node::{NodeService, NodeServiceData};