diff --git a/neofs-testlib/neofs_testlib/env/env.py b/neofs-testlib/neofs_testlib/env/env.py index e40d4adef..2006e8425 100644 --- a/neofs-testlib/neofs_testlib/env/env.py +++ b/neofs-testlib/neofs_testlib/env/env.py @@ -36,7 +36,6 @@ DEFAULT_REST_OPERATION_TIMEOUT, get_assets_dir_path, ) - from neofs_testlib.cli import NeofsAdm, NeofsCli, NeofsLens, NeoGo from neofs_testlib.shell import LocalShell from neofs_testlib.utils import wallet as wallet_utils @@ -340,16 +339,18 @@ def generate_alphabet_wallets( @allure.step("Kill current neofs env") def kill(self): - if self.rest_gw: + if self.rest_gw and self.rest_gw.process: self.rest_gw.process.kill() - if self.s3_gw: + if self.s3_gw and self.s3_gw.process: self.s3_gw.process.kill() - if self.main_chain: + if self.main_chain and self.main_chain.process: self.main_chain.process.kill() for sn in self.storage_nodes: - sn.process.kill() + if sn.process: + sn.process.kill() for ir in self.inner_ring_nodes: - ir.process.kill() + if ir.process: + ir.process.kill() def persist(self) -> str: persisted_path = self._generate_temp_file(os.path.dirname(self._env_dir), prefix="persisted_env") @@ -979,13 +980,23 @@ def start(self, fresh=True): @allure.step("Stop storage node") def stop(self): logger.info(f"Stopping Storage Node:{self}") - terminate_process(self.process) - self.process = None + if self.process: + terminate_process(self.process) + self.process = None + self._wait_until_not_ready() + else: + AssertionError("Storage node has been already stopped") @allure.step("Kill storage node") def kill(self): logger.info(f"Killing Storage Node:{self}") - self.process.kill() + if self.process: + self.process.kill() + self.process.kill() + self.process = None + self._wait_until_not_ready() + else: + AssertionError("Storage node has been already killed") @allure.step("Delete storage node data") def delete_data(self): @@ -1070,6 +1081,16 @@ def _wait_until_ready(self): assert "Health status: READY" in result.stdout, "Health is not ready" assert "Network status: ONLINE" in result.stdout, "Network is not online" + @retry(wait=wait_fixed(15), stop=stop_after_attempt(10), reraise=True) + def _wait_until_not_ready(self): + neofs_cli = self.neofs_env.neofs_cli(self.cli_config) + try: + result = neofs_cli.control.healthcheck(endpoint=self.control_grpc_endpoint) + except Exception: + return + assert "Health status: READY" not in result.stdout, "Health is ready" + assert "Network status: ONLINE" not in result.stdout, "Network is online" + def _get_version(self) -> str: raw_version_output = self.neofs_env._run_single_command(self.neofs_env.neofs_node_path, "--version") for line in raw_version_output.splitlines(): diff --git a/pytest_tests/tests/failovers/test_failover_storage.py b/pytest_tests/tests/failovers/test_failover_storage.py index df9e9ca35..f2b1da40f 100644 --- a/pytest_tests/tests/failovers/test_failover_storage.py +++ b/pytest_tests/tests/failovers/test_failover_storage.py @@ -1,32 +1,37 @@ import logging +import random import allure import pytest from helpers.complex_object_actions import wait_object_replication from helpers.container import create_container from helpers.file_helper import generate_file, get_file_hash -from helpers.neofs_verbs import get_object, put_object_to_random_node -from helpers.node_management import wait_all_storage_nodes_returned +from helpers.neofs_verbs import get_object, put_object, put_object_to_random_node +from helpers.node_management import storage_node_healthcheck, wait_all_storage_nodes_returned from helpers.wellknown_acl import PUBLIC_ACL from neofs_env.neofs_env_test_base import NeofsEnvTestBase from neofs_testlib.env.env import NeoFSEnv, StorageNode logger = logging.getLogger("NeoLogger") -stopped_nodes: list[StorageNode] = [] @pytest.fixture -@allure.step("Return all stopped hosts") def after_run_return_all_stopped_storage_nodes(neofs_env: NeoFSEnv): yield - return_stopped_storage_nodes(neofs_env) + unavailable_nodes = [] + for node in neofs_env.storage_nodes: + try: + storage_node_healthcheck(node) + except Exception: + unavailable_nodes.append(node) + return_stopped_storage_nodes(neofs_env, unavailable_nodes) -def return_stopped_storage_nodes(neofs_env: NeoFSEnv) -> None: - for node in list(stopped_nodes): +@allure.step("Return all stopped hosts") +def return_stopped_storage_nodes(neofs_env: NeoFSEnv, stopped_nodes: list[StorageNode]) -> None: + for node in stopped_nodes: with allure.step(f"Start {node}"): node.start(fresh=False) - stopped_nodes.remove(node) wait_all_storage_nodes_returned(neofs_env) @@ -37,6 +42,7 @@ class TestFailoverStorage(NeofsEnvTestBase): def test_storage_node_failover( self, default_wallet, simple_object_size, after_run_return_all_stopped_storage_nodes, hard_restart ): + stopped_nodes = [] wallet = default_wallet placement_rule = "REP 2 IN X CBF 2 SELECT 2 FROM * AS X" source_file_path = generate_file(simple_object_size) @@ -76,7 +82,7 @@ def test_storage_node_failover( assert get_file_hash(source_file_path) == get_file_hash(got_file_path) with allure.step("Return stopped storage nodes"): - return_stopped_storage_nodes(self.neofs_env) + return_stopped_storage_nodes(self.neofs_env, stopped_nodes) with allure.step("Check object data is not corrupted"): new_nodes = wait_object_replication( @@ -84,3 +90,81 @@ def test_storage_node_failover( ) got_file_path = get_object(wallet.path, cid, oid, shell=self.shell, endpoint=new_nodes[0].endpoint) assert get_file_hash(source_file_path) == get_file_hash(got_file_path) + + def test_put_get_without_storage_node( + self, default_wallet, simple_object_size, after_run_return_all_stopped_storage_nodes + ): + with allure.step("Kill one storage node"): + dead_node = self.neofs_env.storage_nodes[0] + alive_nodes = self.neofs_env.storage_nodes[1:] + + dead_node.kill() + + with allure.step("Create container"): + wallet = default_wallet + placement_rule = "REP 3" + cid = create_container( + wallet.path, + shell=self.shell, + endpoint=alive_nodes[0].endpoint, + rule=placement_rule, + basic_acl=PUBLIC_ACL, + ) + + with allure.step("Put objects"): + for _ in range(10): + source_file_path = generate_file(simple_object_size) + oid = put_object( + wallet.path, + source_file_path, + cid, + shell=self.shell, + endpoint=random.choice(alive_nodes).endpoint, + ) + wait_object_replication(cid, oid, 3, shell=self.shell, nodes=alive_nodes, neofs_env=self.neofs_env) + + with allure.step("Get last object"): + got_file_path = get_object(wallet.path, cid, oid, shell=self.shell, endpoint=alive_nodes[0].endpoint) + assert get_file_hash(source_file_path) == get_file_hash(got_file_path) + + with allure.step("Return stopped storage node"): + return_stopped_storage_nodes(self.neofs_env, [dead_node]) + + with allure.step("Get last object from previously dead node"): + got_file_path = get_object(wallet.path, cid, oid, shell=self.shell, endpoint=dead_node.endpoint) + assert get_file_hash(source_file_path) == get_file_hash(got_file_path) + + def test_put_get_without_storage_nodes( + self, default_wallet, simple_object_size, after_run_return_all_stopped_storage_nodes + ): + with allure.step("Kill two storage nodes"): + dead_nodes = self.neofs_env.storage_nodes[:2] + alive_nodes = self.neofs_env.storage_nodes[2:] + + for dead_node in dead_nodes: + dead_node.kill() + + with allure.step("Create container"): + wallet = default_wallet + placement_rule = "REP 3" + cid = create_container( + wallet.path, + shell=self.shell, + endpoint=alive_nodes[0].endpoint, + rule=placement_rule, + basic_acl=PUBLIC_ACL, + ) + + with allure.step("Try to put object and expect error"): + source_file_path = generate_file(simple_object_size) + with pytest.raises(Exception, match=r".*incomplete object PUT by placement.*"): + put_object( + wallet.path, + source_file_path, + cid, + shell=self.shell, + endpoint=alive_nodes[0].endpoint, + ) + + with allure.step("Return stopped storage node"): + return_stopped_storage_nodes(self.neofs_env, dead_nodes)