From 93b65a17e59f28ffa12efa51c391090278bc550d Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Fri, 19 Jan 2024 09:58:47 -0800 Subject: [PATCH 1/6] Add monitoring instrumentation around `dhcp-disable` state. PROBLEM ======= We're tracking reported issues in our environment where Kea servers are acting like they are being left disabled by unkonwn causes. Discovering the root cause of that problem has been hampered by poor visibility; * It is normal for servers to go without receiving packets. When the `pkt?-received` counter stops incrementing, we cannot determine reliably if this is due to e.g. DHCP relay agent misconfiguration or a Kea service issue. It frustrates debugging. * Kea does not increment any counter to indicate it is disabled, providing us with no clear monitoring predicate to detect the situation early. * It is consequently hours or days before this is noticed, by which time debug logs have rotated. This results in the situation of there being no outward indication of disabled state. SOLUTION ======== In this change, we resolve our visibility problems; * A new `pkt?-raw-received` counter is incremented at the earliest point of execution we know a packet has been received (in `runOne()`). * A new `pkt?-dhcp-disabled` counter counts packets dropped due to the service being in a disabled state at the time of reception. * A WARN level log is emitted for packets dropped while disabled. To alleviate spam concerns, a 1-in-100 sample is used. * To facilitate this change, the `StatsMgr` now provides a `getInteger()` method. * `NetworkState` internal boolean constructs are exposed as monitoring metrics. This allows us to observe the normal process of disabling service during e.g. HA reconnects and recovery, and separately detect extended unexpected disabled states prior to sufficient packet level impacts (consider e.g. DHCP anycast servers which may not receive packets to count as disabled for extended periods - we want to know it had been disabled for several days before the anycast route changes, not after). This "belt and suspenders" approach should close our monitoring gaps and provide attention to faulty services as they present. DISCUSSION ========== The `pkt?-raw-received` counters are a glaring admission of guilt, and we should use `pkt?-received` in these places. The exsisting `pkt?-received` counters should be renamed to `pkt?-processed`, as they indicate execution of the respective `processPacket()` function - still a useful monitoring indicator that e.g. the thread handoff succeeded. Probably there should also be a counter for the case where the callback addition failed instead of the present debug log. **HOWEVER**, existing Kea users may rely on the present behavior and names of metrics. A monitoring predicate using the current `pkt?-received` metrics may trigger automatic remediation processes for Kea servers unintentionally left disabled for too long; these predicate configurations would have to change or else the remediation would be defeated. This may not be an ideal user experience. Consequently, I've offered the unfortunate solution you see here; `pkt?-raw-received`, but I'm extremely unhappy with it; I'd be happy to modify the situation to whatever ISC prefers to offer their users in this case. --- ChangeLog | 9 ++ src/bin/dhcp4/dhcp4_srv.cc | 16 +++ src/bin/dhcp4/dhcp4_srv.h | 2 + .../dhcp4/tests/ctrl_dhcp4_srv_unittest.cc | 12 ++ src/bin/dhcp4/tests/dhcp4_test_utils.cc | 3 + src/bin/dhcp4/tests/dora_unittest.cc | 11 ++ src/bin/dhcp4/tests/inform_unittest.cc | 4 + src/bin/dhcp6/dhcp6_srv.cc | 16 +++ src/bin/dhcp6/dhcp6_srv.h | 2 + .../dhcp6/tests/ctrl_dhcp6_srv_unittest.cc | 12 ++ src/lib/dhcpsrv/network_state.cc | 24 ++++ src/lib/dhcpsrv/network_state.h | 3 + .../dhcpsrv/tests/network_state_unittest.cc | 114 ++++++++++++++++++ src/lib/stats/stats_mgr.cc | 19 +++ src/lib/stats/stats_mgr.h | 14 +++ src/lib/stats/tests/stats_mgr_unittest.cc | 1 + 16 files changed, 262 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8da4e5a228..020af9f455 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2201. [func] dhankins + New metrics pkt4-dhcp-disabled, pkt4-raw-received, disabled-by-db, + disabled-by-ha-local, disabled-by-ha-remote, disabled-by-user, and + disabled-globally provide monitoring level visibility into the network_state + manager, and closes a monitoring gap where packets dropped while disabled + are not counted by any metric. + While disabled, a WARN log is emitted every DISABLE_WARN_PACKET{4|6}(100) + dropped packets, in addition to the existing DEBUG log. + 2200. [func] piotrek Kea now supports new DHCPv4 option code 121, Classless Static Route option defined in RFC 3442. diff --git a/src/bin/dhcp4/dhcp4_srv.cc b/src/bin/dhcp4/dhcp4_srv.cc index 612e8aa58b..8e49c82dae 100644 --- a/src/bin/dhcp4/dhcp4_srv.cc +++ b/src/bin/dhcp4/dhcp4_srv.cc @@ -122,6 +122,8 @@ struct Dhcp4Hooks { /// List of statistics which is initialized to 0 during the DHCPv4 /// server startup. std::set dhcp4_statistics = { + "pkt4-dhcp-disabled", + "pkt4-raw-received", "pkt4-received", "pkt4-discover-received", "pkt4-offer-received", @@ -1096,10 +1098,24 @@ Dhcpv4Srv::runOne() { return; } + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + // Count raw packets received as soon as we know a packet has been read. + stats_mgr.addValue("pkt4-raw-received", static_cast(1)); + // If the DHCP service has been globally disabled, drop the packet. if (!network_state_->isServiceEnabled()) { LOG_DEBUG(bad_packet4_logger, DBGLVL_PKT_HANDLING, DHCP4_PACKET_DROP_0008) .arg(query->getLabel()); + + // Log a warning every DISABLE_WARN_PACKET4. + if (stats_mgr.getInteger("pkt4-dhcp-disabled") % DISABLE_WARN_PACKET4 == 0) { + LOG_WARN(dhcp4_logger, DHCP4_PACKET_DROP_0008) + .arg(query->getLabel()); + } + + // Count packets dropped due to (hopefully temporary) service disabled. + stats_mgr.addValue("pkt4-dhcp-disabled", static_cast(1)); return; } else { if (MultiThreadingMgr::instance().getMode()) { diff --git a/src/bin/dhcp4/dhcp4_srv.h b/src/bin/dhcp4/dhcp4_srv.h index 0ed55adfe5..f0770b1fff 100644 --- a/src/bin/dhcp4/dhcp4_srv.h +++ b/src/bin/dhcp4/dhcp4_srv.h @@ -40,6 +40,8 @@ namespace isc { namespace dhcp { +static const int64_t DISABLE_WARN_PACKET4 = 100; + /// @brief DHCPv4 message exchange. /// /// This class represents the DHCPv4 message exchange. The message exchange diff --git a/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc b/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc index 14ffe8d4ff..158e7b953e 100644 --- a/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc +++ b/src/bin/dhcp4/tests/ctrl_dhcp4_srv_unittest.cc @@ -634,11 +634,23 @@ TEST_F(CtrlChannelDhcpv4SrvTest, controlChannelStats) { " \"name\":\"bogus\" }}", response); EXPECT_EQ("{ \"arguments\": { }, \"result\": 0 }", response); + // Set max sample count to 1 to match test expectation (disabled state + // gauges normally have 2 samples). + sendUnixCommand("{ \"command\" : \"statistic-sample-count-set-all\", " + " \"arguments\": { \"max-samples\": 1 }}", response); + EXPECT_EQ("{ \"result\": 0, \"text\": \"All statistics count limit are set.\" }", response); // Check statistic-get-all sendUnixCommand("{ \"command\" : \"statistic-get-all\", " " \"arguments\": {}}", response); std::set initial_stats = { + "disabled-by-db", + "disabled-by-ha-local", + "disabled-by-ha-remote", + "disabled-by-user", + "disabled-globally", + "pkt4-dhcp-disabled", + "pkt4-raw-received", "pkt4-received", "pkt4-discover-received", "pkt4-offer-received", diff --git a/src/bin/dhcp4/tests/dhcp4_test_utils.cc b/src/bin/dhcp4/tests/dhcp4_test_utils.cc index c3aa7c44c8..f9238ea67a 100644 --- a/src/bin/dhcp4/tests/dhcp4_test_utils.cc +++ b/src/bin/dhcp4/tests/dhcp4_test_utils.cc @@ -994,14 +994,17 @@ Dhcpv4SrvTest::pretendReceivingPkt(NakedDhcpv4Srv& srv, const std::string& confi using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_rcvd = mgr.getObservation("pkt4-received"); ObservationPtr tested_stat = mgr.getObservation(stat_name); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_rcvd); ASSERT_TRUE(tested_stat); // They also must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_rcvd->getInteger().first); EXPECT_EQ(1, tested_stat->getInteger().first); } diff --git a/src/bin/dhcp4/tests/dora_unittest.cc b/src/bin/dhcp4/tests/dora_unittest.cc index 9520f14d2b..ec9e647664 100644 --- a/src/bin/dhcp4/tests/dora_unittest.cc +++ b/src/bin/dhcp4/tests/dora_unittest.cc @@ -2504,6 +2504,8 @@ DORATest::statisticsDORA() { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_dhcp_disabled = mgr.getObservatoin("pkt4-dhcp-disabled"); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_discover_received = mgr.getObservation("pkt4-discover-received"); ObservationPtr pkt4_offer_sent = mgr.getObservation("pkt4-offer-sent"); @@ -2512,6 +2514,8 @@ DORATest::statisticsDORA() { ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_dhcp_disabled); + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_discover_received); ASSERT_TRUE(pkt4_offer_sent); @@ -2520,6 +2524,8 @@ DORATest::statisticsDORA() { ASSERT_TRUE(pkt4_sent); // They also must have expected values. + EXPECT_EQ(0, pkt4_dhcp_disabled->getInteger().first); + EXPECT_EQ(2, pkt4_raw_received->getInteger().first); EXPECT_EQ(2, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_discover_received->getInteger().first); EXPECT_EQ(1, pkt4_offer_sent->getInteger().first); @@ -2535,6 +2541,8 @@ DORATest::statisticsDORA() { ASSERT_NO_THROW(client.doRequest()); // Let's see if the stats are properly updated. + EXPECT_EQ(0, pkt4_dhcp_disabled->getInteger().first); + EXPECT_EQ(5, pkt4_raw_received->getInteger().first); EXPECT_EQ(5, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_discover_received->getInteger().first); EXPECT_EQ(1, pkt4_offer_sent->getInteger().first); @@ -2576,6 +2584,7 @@ DORATest::statisticsNAK() { using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_request_received = mgr.getObservation("pkt4-request-received"); ObservationPtr pkt4_ack_sent = mgr.getObservation("pkt4-ack-sent"); @@ -2583,6 +2592,7 @@ DORATest::statisticsNAK() { ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_request_received); ASSERT_FALSE(pkt4_ack_sent); // No acks were sent, no such statistic expected. @@ -2590,6 +2600,7 @@ DORATest::statisticsNAK() { ASSERT_TRUE(pkt4_sent); // They also must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_request_received->getInteger().first); EXPECT_EQ(1, pkt4_nak_sent->getInteger().first); diff --git a/src/bin/dhcp4/tests/inform_unittest.cc b/src/bin/dhcp4/tests/inform_unittest.cc index 50697d5717..411cbf61fa 100644 --- a/src/bin/dhcp4/tests/inform_unittest.cc +++ b/src/bin/dhcp4/tests/inform_unittest.cc @@ -623,18 +623,21 @@ TEST_F(InformTest, statisticsInform) { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); + ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_inform_received = mgr.getObservation("pkt4-inform-received"); ObservationPtr pkt4_ack_sent = mgr.getObservation("pkt4-ack-sent"); ObservationPtr pkt4_sent = mgr.getObservation("pkt4-sent"); // All expected statistics must be present. + ASSERT_TRUE(pkt4_raw_received); ASSERT_TRUE(pkt4_received); ASSERT_TRUE(pkt4_inform_received); ASSERT_TRUE(pkt4_ack_sent); ASSERT_TRUE(pkt4_sent); // And they must have expected values. + EXPECT_EQ(1, pkt4_raw_received->getInteger().first); EXPECT_EQ(1, pkt4_received->getInteger().first); EXPECT_EQ(1, pkt4_inform_received->getInteger().first); EXPECT_EQ(1, pkt4_ack_sent->getInteger().first); @@ -648,6 +651,7 @@ TEST_F(InformTest, statisticsInform) { ASSERT_NO_THROW(client.doInform()); // Let's see if the stats are properly updated. + EXPECT_EQ(5, pkt4_raw_received->getInteger().first); EXPECT_EQ(5, pkt4_received->getInteger().first); EXPECT_EQ(5, pkt4_inform_received->getInteger().first); EXPECT_EQ(5, pkt4_ack_sent->getInteger().first); diff --git a/src/bin/dhcp6/dhcp6_srv.cc b/src/bin/dhcp6/dhcp6_srv.cc index 60324d0c3f..b6435e92d7 100644 --- a/src/bin/dhcp6/dhcp6_srv.cc +++ b/src/bin/dhcp6/dhcp6_srv.cc @@ -178,6 +178,8 @@ createStatusCode(const Pkt6& pkt, const Option6IA& ia, const uint16_t status_cod /// List of statistics which is initialized to 0 during the DHCPv6 /// server startup. std::set dhcp6_statistics = { + "pkt6-dhcp-disabled", + "pkt6-raw-received", "pkt6-received", "pkt6-solicit-received", "pkt6-advertise-received", @@ -684,10 +686,24 @@ Dhcpv6Srv::runOne() { return; } + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + // Count raw packets received as soon as we know a packet has been read. + stats_mgr.addValue("pkt6-raw-received", static_cast(1)); + // If the DHCP service has been globally disabled, drop the packet. if (!network_state_->isServiceEnabled()) { LOG_DEBUG(bad_packet6_logger, DBGLVL_PKT_HANDLING, DHCP6_PACKET_DROP_DHCP_DISABLED) .arg(query->getLabel()); + + // Log a warning every DISABLE_WARN_PACKET6. + if (stats_mgr.getInteger("pkt6-dhcp-disabled") % DISABLE_WARN_PACKET6 == 0) { + LOG_WARN(dhcp6_logger, DHCP6_PACKET_DROP_DHCP_DISABLED) + .arg(query->getLabel()); + } + + // Count packets dropped due to (hopefully temporary) service disabled. + stats_mgr.addValue("pkt6-dhcp-disabled", static_cast(1)); return; } else { if (MultiThreadingMgr::instance().getMode()) { diff --git a/src/bin/dhcp6/dhcp6_srv.h b/src/bin/dhcp6/dhcp6_srv.h index e2c255d809..bb8a080b35 100644 --- a/src/bin/dhcp6/dhcp6_srv.h +++ b/src/bin/dhcp6/dhcp6_srv.h @@ -43,6 +43,8 @@ namespace isc { namespace dhcp { +static const int64_t DISABLE_WARN_PACKET6 = 100; + /// @brief This exception is thrown when DHCP server hits the error which should /// result in discarding the message being processed. class DHCPv6DiscardMessageError : public Exception { diff --git a/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc b/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc index 421cf13ccf..5569e5e32f 100644 --- a/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc +++ b/src/bin/dhcp6/tests/ctrl_dhcp6_srv_unittest.cc @@ -1392,11 +1392,23 @@ TEST_F(CtrlChannelDhcpv6SrvTest, controlChannelStats) { " \"name\":\"bogus\" }}", response); EXPECT_EQ("{ \"arguments\": { }, \"result\": 0 }", response); + // Set max sample count to 1 to match test expectation (disabled state + // gauges normally have 2 samples). + sendUnixCommand("{ \"command\" : \"statistic-sample-count-set-all\", " + " \"arguments\": { \"max-samples\": 1 }}", response); + EXPECT_EQ("{ \"result\": 0, \"text\": \"All statistics count limit are set.\" }", response); // Check statistic-get-all sendUnixCommand("{ \"command\" : \"statistic-get-all\", " " \"arguments\": {}}", response); std::set initial_stats = { + "disabled-by-db", + "disabled-by-ha-local", + "disabled-by-ha-remote", + "disabled-by-user", + "disabled-globally", + "pkt6-dhcp-disabled", + "pkt6-raw-received", "pkt6-received", "pkt6-solicit-received", "pkt6-advertise-received", diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 521d9ed82b..17942429f1 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this disabled_subnets_(), disabled_networks_(), timer_mgr_(TimerMgr::instance()), disabled_by_origin_(), disabled_by_db_connection_(0) { + updateStats(); } /// @brief Destructor. @@ -76,6 +78,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this globally_disabled_ = false; } } + updateStats(); } /// @brief Reset internal counters for a database connection origin. @@ -86,6 +89,7 @@ class NetworkStateImpl : public boost::enable_shared_from_this if (disabled_by_origin_.empty()) { globally_disabled_ = false; } + updateStats(); } /// @brief Enables DHCP service for an origin. @@ -168,6 +172,26 @@ class NetworkStateImpl : public boost::enable_shared_from_this /// @brief Flag which indicates the state has been disabled by a DB /// connection loss. uint32_t disabled_by_db_connection_; + +private: + /// @private + + /// @brief Update monitoring metrics to expose disable states. + /// + /// A 0 value is used for false, 1 for true. + void updateStats() { + isc::stats::StatsMgr::instance().setValue("disabled-globally", + static_cast(globally_disabled_)); + isc::stats::StatsMgr::instance().setValue("disabled-by-user", + static_cast(disabled_by_origin_.contains(USER_COMMAND))); + isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", + static_cast(disabled_by_origin_.contains(HA_LOCAL_COMMAND))); + isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", + static_cast(disabled_by_origin_.contains(HA_REMOTE_COMMAND))); + // Expose the disabled-by-db-connection counter by direct value. + isc::stats::StatsMgr::instance().setValue("disabled-by-db", + static_cast(disabled_by_db_connection_)); + } }; NetworkState::NetworkState(const NetworkState::ServerType& server_type) diff --git a/src/lib/dhcpsrv/network_state.h b/src/lib/dhcpsrv/network_state.h index 861d1a78cb..81d0d320d8 100644 --- a/src/lib/dhcpsrv/network_state.h +++ b/src/lib/dhcpsrv/network_state.h @@ -200,6 +200,9 @@ class NetworkState { private: + /// @brief Update monitoring metrics to expose disable states. + void updateStats(); + /// @brief Pointer to the @c NetworkState implementation. boost::shared_ptr impl_; diff --git a/src/lib/dhcpsrv/tests/network_state_unittest.cc b/src/lib/dhcpsrv/tests/network_state_unittest.cc index 84f26c6ee3..6a3a0b8397 100644 --- a/src/lib/dhcpsrv/tests/network_state_unittest.cc +++ b/src/lib/dhcpsrv/tests/network_state_unittest.cc @@ -142,6 +142,9 @@ class NetworkStateTest : public ::testing::Test { /// from different origins and that it results in each timer being scheduled. void multipleDifferentOriginsDelayedEnableServiceTest(); + /// @brief This test verifies that monitoring metrics are updated. + void monitoringMetricsUpdatedTest(); + /// @brief Runs IO service with a timeout. /// /// @param timeout_ms Timeout for running IO service in milliseconds. @@ -603,6 +606,113 @@ NetworkStateTest::multipleDifferentOriginsDelayedEnableServiceTest() { EXPECT_FALSE(state.isDelayedEnableService()); } +void +NetworkStateTest::monitoringMetricsUpdatedTest() { + NetworkState state(NetworkState::DHCPv4); + isc::stats::StatsMgr& mgr = isc::stats::StatsMgr::instance(); + isc::stats::ObservationPtr disabled_globally = mgr.getObservation("disabled-globally"); + isc::stats::ObservationPtr disabled_by_user = mgr.getObservation("disabled-by-user"); + isc::stats::ObservationPtr disabled_by_ha_local = mgr.getObservation("disabled-by-ha-local"); + isc::stats::ObservationPtr disabled_by_ha_remote = mgr.getObservation("disabled-by-ha-remote"); + isc::stats::ObservationPtr disabled_by_db = mgr.getObservation("disabled-by-db"); + + ASSERT_TRUE(disabled_globally); + ASSERT_TRUE(disabled_by_user); + ASSERT_TRUE(disabled_by_ha_local); + ASSERT_TRUE(disabled_by_ha_remote); + ASSERT_TRUE(disabled_by_db); + + // Initial state. + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by user. + state.disableService(NetworkState::Origin::USER_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by ha local. + state.disableService(NetworkState::Origin::HA_LOCAL_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Disable by ha remote. + state.disableService(NetworkState::Origin::HA_REMOTE_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(1, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Enable by user. + state.enableService(NetworkState::Origin::USER_COMMAND); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(1, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // Enable by ha. + state.enableService(NetworkState::Origin::HA_LOCAL_COMMAND); + state.enableService(NetworkState::Origin::HA_REMOTE_COMMAND); + + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); + + // DB connection disables are a counter. + state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(3, disabled_by_db->getInteger().first); + + state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_FALSE(state.isServiceEnabled()); + EXPECT_EQ(1, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(1, disabled_by_db->getInteger().first); + + state.enableService(NetworkState::Origin::DB_CONNECTION); + + EXPECT_TRUE(state.isServiceEnabled()); + EXPECT_EQ(0, disabled_globally->getInteger().first); + EXPECT_EQ(0, disabled_by_user->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_local->getInteger().first); + EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); + EXPECT_EQ(0, disabled_by_db->getInteger().first); +} + // Test invocations. TEST_F(NetworkStateTest, defaultTest) { @@ -749,4 +859,8 @@ TEST_F(NetworkStateTest, multipleDifferentOriginsDelayedEnableServiceTestMultiTh multipleDifferentOriginsDelayedEnableServiceTest(); } +TEST_F(NetworkStateTest, monitoringMetricsUpdatedTest) { + monitoringMetricsUpdatedTest(); +} + } // end of anonymous namespace diff --git a/src/lib/stats/stats_mgr.cc b/src/lib/stats/stats_mgr.cc index b420cabc82..366f03a750 100644 --- a/src/lib/stats/stats_mgr.cc +++ b/src/lib/stats/stats_mgr.cc @@ -273,6 +273,25 @@ StatsMgr::removeAllInternal() { global_->clear(); } +int64_t +StatsMgr::getInteger(const std::string& name) const { + if (MultiThreadingMgr::instance().getMode()) { + lock_guard lock(*mutex_); + return (getIntegerInternal(name)); + } else { + return (getIntegerInternal(name)); + } +} + +int64_t +StatsMgr::getIntegerInternal(const std::string& name) const { + ObservationPtr existing = getObservationInternal(name); + if (existing) { + return existing->getInteger().first; + } + return static_cast(0); +} + ConstElementPtr StatsMgr::get(const string& name) const { MultiThreadingLock lock(*mutex_); diff --git a/src/lib/stats/stats_mgr.h b/src/lib/stats/stats_mgr.h index f49ab8ab7c..e9514f7b67 100644 --- a/src/lib/stats/stats_mgr.h +++ b/src/lib/stats/stats_mgr.h @@ -249,6 +249,11 @@ class StatsMgr : public boost::noncopyable { /// @return number of recorded statistics. size_t count() const; + /// @brief Returns the most recent integer value for a monitoring variable. + /// + /// @return integer value as a 64 bit integer + int64_t getInteger(const std::string& name) const; + /// @brief Returns a single statistic as a JSON structure. /// /// @return JSON structures representing a single statistic @@ -715,6 +720,15 @@ class StatsMgr : public boost::noncopyable { /// @private + /// @brief Returns the most recent integer value for a monitoring variable. + /// + /// Should be called in a thread safe context. + /// + /// @return integer value as a 64 bit integer + int64_t getIntegerInternal(const std::string& name) const; + + /// @private + /// @brief Returns a single statistic as a JSON structure. /// /// Should be called in a thread safe context. diff --git a/src/lib/stats/tests/stats_mgr_unittest.cc b/src/lib/stats/tests/stats_mgr_unittest.cc index 734d134c8d..22f595e3ac 100644 --- a/src/lib/stats/tests/stats_mgr_unittest.cc +++ b/src/lib/stats/tests/stats_mgr_unittest.cc @@ -76,6 +76,7 @@ TEST_F(StatsMgrTest, integerStat) { isc::util::clockToText(alpha->getInteger().second) + "\" ] ] }"; EXPECT_EQ(exp, StatsMgr::instance().get("alpha")->str()); + EXPECT_EQ(alpha->getInteger().first, StatsMgr::instance().getInteger("alpha")); } // Test checks whether it's possible to record and later report From e0392c46b69cdddb56a3be195c07e5cb02bf8a9e Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Mon, 22 Jan 2024 08:14:30 -0800 Subject: [PATCH 2/6] Include namespace of origin constants. --- src/lib/dhcpsrv/network_state.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 17942429f1..84b9d0c8aa 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -183,11 +183,11 @@ class NetworkStateImpl : public boost::enable_shared_from_this isc::stats::StatsMgr::instance().setValue("disabled-globally", static_cast(globally_disabled_)); isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.contains(USER_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::USER_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.contains(HA_LOCAL_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::HA_LOCAL_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.contains(HA_REMOTE_COMMAND))); + static_cast(disabled_by_origin_.contains(NetworkState::HA_REMOTE_COMMAND))); // Expose the disabled-by-db-connection counter by direct value. isc::stats::StatsMgr::instance().setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); From 323435db2b1f4852555a27995302a6616ab786ec Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Thu, 25 Jan 2024 15:54:50 -0800 Subject: [PATCH 3/6] Use `std::unordered_set<>.count()` instead of `.contains()`. The contains() method was added in C++20, while count() will always return 0 or 1 and goes back to C++11. --- src/lib/dhcpsrv/network_state.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index 84b9d0c8aa..bb63ccdc5f 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -183,11 +183,11 @@ class NetworkStateImpl : public boost::enable_shared_from_this isc::stats::StatsMgr::instance().setValue("disabled-globally", static_cast(globally_disabled_)); isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.contains(NetworkState::USER_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::USER_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.contains(NetworkState::HA_LOCAL_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::HA_LOCAL_COMMAND))); isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.contains(NetworkState::HA_REMOTE_COMMAND))); + static_cast(disabled_by_origin_.count(NetworkState::HA_REMOTE_COMMAND))); // Expose the disabled-by-db-connection counter by direct value. isc::stats::StatsMgr::instance().setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); From 34e3453c22c3470b846c1435d69638559079e1b5 Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Thu, 25 Jan 2024 15:56:38 -0800 Subject: [PATCH 4/6] Fix compilation errors. --- src/bin/dhcp4/tests/dora_unittest.cc | 2 +- .../dhcpsrv/tests/network_state_unittest.cc | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/bin/dhcp4/tests/dora_unittest.cc b/src/bin/dhcp4/tests/dora_unittest.cc index ec9e647664..ae1b7848ba 100644 --- a/src/bin/dhcp4/tests/dora_unittest.cc +++ b/src/bin/dhcp4/tests/dora_unittest.cc @@ -2504,7 +2504,7 @@ DORATest::statisticsDORA() { // Ok, let's check the statistics. using namespace isc::stats; StatsMgr& mgr = StatsMgr::instance(); - ObservationPtr pkt4_dhcp_disabled = mgr.getObservatoin("pkt4-dhcp-disabled"); + ObservationPtr pkt4_dhcp_disabled = mgr.getObservation("pkt4-dhcp-disabled"); ObservationPtr pkt4_raw_received = mgr.getObservation("pkt4-raw-received"); ObservationPtr pkt4_received = mgr.getObservation("pkt4-received"); ObservationPtr pkt4_discover_received = mgr.getObservation("pkt4-discover-received"); diff --git a/src/lib/dhcpsrv/tests/network_state_unittest.cc b/src/lib/dhcpsrv/tests/network_state_unittest.cc index 6a3a0b8397..f94ebf4a94 100644 --- a/src/lib/dhcpsrv/tests/network_state_unittest.cc +++ b/src/lib/dhcpsrv/tests/network_state_unittest.cc @@ -12,6 +12,7 @@ #include #include #include +#include using namespace isc; using namespace isc::asiolink; @@ -631,7 +632,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by user. - state.disableService(NetworkState::Origin::USER_COMMAND); + state.disableService(NetworkState::USER_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -641,7 +642,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by ha local. - state.disableService(NetworkState::Origin::HA_LOCAL_COMMAND); + state.disableService(NetworkState::HA_LOCAL_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -651,7 +652,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Disable by ha remote. - state.disableService(NetworkState::Origin::HA_REMOTE_COMMAND); + state.disableService(NetworkState::HA_REMOTE_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -661,7 +662,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Enable by user. - state.enableService(NetworkState::Origin::USER_COMMAND); + state.enableService(NetworkState::USER_COMMAND); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -671,8 +672,8 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // Enable by ha. - state.enableService(NetworkState::Origin::HA_LOCAL_COMMAND); - state.enableService(NetworkState::Origin::HA_REMOTE_COMMAND); + state.enableService(NetworkState::HA_LOCAL_COMMAND); + state.enableService(NetworkState::HA_REMOTE_COMMAND); EXPECT_TRUE(state.isServiceEnabled()); EXPECT_EQ(0, disabled_globally->getInteger().first); @@ -682,9 +683,9 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_db->getInteger().first); // DB connection disables are a counter. - state.disableService(NetworkState::Origin::DB_CONNECTION); - state.disableService(NetworkState::Origin::DB_CONNECTION); - state.disableService(NetworkState::Origin::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); + state.disableService(NetworkState::DB_CONNECTION); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -693,8 +694,8 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); EXPECT_EQ(3, disabled_by_db->getInteger().first); - state.enableService(NetworkState::Origin::DB_CONNECTION); - state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); EXPECT_FALSE(state.isServiceEnabled()); EXPECT_EQ(1, disabled_globally->getInteger().first); @@ -703,7 +704,7 @@ NetworkStateTest::monitoringMetricsUpdatedTest() { EXPECT_EQ(0, disabled_by_ha_remote->getInteger().first); EXPECT_EQ(1, disabled_by_db->getInteger().first); - state.enableService(NetworkState::Origin::DB_CONNECTION); + state.enableService(NetworkState::DB_CONNECTION); EXPECT_TRUE(state.isServiceEnabled()); EXPECT_EQ(0, disabled_globally->getInteger().first); From e7269217e71f8cc8766283d7a0c4ad76cae83a9d Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Tue, 30 Jan 2024 13:34:36 -0800 Subject: [PATCH 5/6] Keep the StatsMgr instance on the stack. --- src/lib/dhcpsrv/network_state.cc | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/lib/dhcpsrv/network_state.cc b/src/lib/dhcpsrv/network_state.cc index bb63ccdc5f..cbf1304f72 100644 --- a/src/lib/dhcpsrv/network_state.cc +++ b/src/lib/dhcpsrv/network_state.cc @@ -180,16 +180,36 @@ class NetworkStateImpl : public boost::enable_shared_from_this /// /// A 0 value is used for false, 1 for true. void updateStats() { - isc::stats::StatsMgr::instance().setValue("disabled-globally", + isc::stats::StatsMgr& stats_mgr = isc::stats::StatsMgr::instance(); + + stats_mgr.setValue("disabled-globally", static_cast(globally_disabled_)); - isc::stats::StatsMgr::instance().setValue("disabled-by-user", - static_cast(disabled_by_origin_.count(NetworkState::USER_COMMAND))); - isc::stats::StatsMgr::instance().setValue("disabled-by-ha-local", - static_cast(disabled_by_origin_.count(NetworkState::HA_LOCAL_COMMAND))); - isc::stats::StatsMgr::instance().setValue("disabled-by-ha-remote", - static_cast(disabled_by_origin_.count(NetworkState::HA_REMOTE_COMMAND))); + + bool user = false, ha_local = false, ha_remote = false; + for (auto origin : disabled_by_origin_) { + switch (origin) { + case NetworkState::USER_COMMAND: + user = true; + break; + default: + if ((NetworkState::HA_LOCAL_COMMAND <= origin) && + (origin < NetworkState::HA_REMOTE_COMMAND)) { + ha_local = true; + } else if ((NetworkState::HA_LOCAL_COMMAND <= origin) && + (origin < NetworkState::DB_CONNECTION)) { + ha_remote = true; + } + break; + } + } + stats_mgr.setValue("disabled-by-user", static_cast(user)); + stats_mgr.setValue("disabled-by-ha-local", + static_cast(ha_local)); + stats_mgr.setValue("disabled-by-ha-remote", + static_cast(ha_remote)); + // Expose the disabled-by-db-connection counter by direct value. - isc::stats::StatsMgr::instance().setValue("disabled-by-db", + stats_mgr.setValue("disabled-by-db", static_cast(disabled_by_db_connection_)); } }; From 6e0ce9da7d73f95b53063e45f95aa1122363e03f Mon Sep 17 00:00:00 2001 From: "David W. Hankins" Date: Tue, 30 Jan 2024 13:34:51 -0800 Subject: [PATCH 6/6] Reference gitlab in Changelog. --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 020af9f455..744d8c3792 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ are not counted by any metric. While disabled, a WARN log is emitted every DISABLE_WARN_PACKET{4|6}(100) dropped packets, in addition to the existing DEBUG log. + (Gitlab #3228) 2200. [func] piotrek Kea now supports new DHCPv4 option code 121, Classless Static