From 05ac6fe102dff45b0290b304423ff7fd79e84cdd Mon Sep 17 00:00:00 2001 From: AlCalzone Date: Wed, 4 Oct 2023 13:35:54 +0200 Subject: [PATCH] fix: limit number of retries when sending a command results in the transmit status `Fail` (#6361) --- docs/api/driver.md | 6 + packages/zwave-js/src/lib/driver/Driver.ts | 63 ++++++++- .../zwave-js/src/lib/driver/ZWaveOptions.ts | 6 + .../lib/test/driver/controllerJammed.test.ts | 125 +++++++++++++++++- 4 files changed, 192 insertions(+), 8 deletions(-) diff --git a/docs/api/driver.md b/docs/api/driver.md index 40149d8fc604..cbab3b66160f 100644 --- a/docs/api/driver.md +++ b/docs/api/driver.md @@ -698,6 +698,9 @@ interface ZWaveOptions extends ZWaveHostOptions { /** How long generated nonces are valid */ nonce: number; // [3000...20000], default: 5000 ms + /** How long to wait before retrying a command when the controller is jammed */ + retryJammed: number; // [10...5000], default: 1000 ms + /** * How long to wait without pending commands before sending a node back to sleep. * Should be as short as possible to save battery, but long enough to give applications time to react. @@ -736,6 +739,9 @@ interface ZWaveOptions extends ZWaveHostOptions { /** How often the driver should try sending SendData commands before giving up */ sendData: number; // [1...5], default: 3 + /** How often the driver should retry SendData commands while the controller is jammed */ + sendDataJammed: number; // [1...10], default: 5 + /** * How many attempts should be made for each node interview before giving up */ diff --git a/packages/zwave-js/src/lib/driver/Driver.ts b/packages/zwave-js/src/lib/driver/Driver.ts index af8c4cca3219..75b843aa94fa 100644 --- a/packages/zwave-js/src/lib/driver/Driver.ts +++ b/packages/zwave-js/src/lib/driver/Driver.ts @@ -254,6 +254,7 @@ const defaultOptions: ZWaveOptions = { nonce: 5000, sendDataCallback: 65000, // as defined in INS13954 sendToSleep: 250, // The default should be enough time for applications to react to devices waking up + retryJammed: 1000, refreshValue: 5000, // Default should handle most slow devices until we have a better solution refreshValueAfterTransition: 1000, // To account for delays in the device serialAPIStarted: 5000, @@ -262,6 +263,7 @@ const defaultOptions: ZWaveOptions = { openSerialPort: 10, controller: 3, sendData: 3, + sendDataJammed: 5, nodeInterview: 5, }, disableOptimisticValueUpdate: false, @@ -313,6 +315,14 @@ function checkOptions(options: ZWaveOptions): void { ZWaveErrorCodes.Driver_InvalidOptions, ); } + if ( + options.timeouts.retryJammed < 10 || options.timeouts.retryJammed > 5000 + ) { + throw new ZWaveError( + `The timeout for retrying while jammed must be between 10 and 5000 milliseconds!`, + ZWaveErrorCodes.Driver_InvalidOptions, + ); + } if ( options.timeouts.sendToSleep < 10 || options.timeouts.sendToSleep > 5000 ) { @@ -369,6 +379,15 @@ function checkOptions(options: ZWaveOptions): void { ZWaveErrorCodes.Driver_InvalidOptions, ); } + if ( + options.attempts.sendDataJammed < 1 + || options.attempts.sendDataJammed > 10 + ) { + throw new ZWaveError( + `The SendData attempts while jammed must be between 1 and 10!`, + ZWaveErrorCodes.Driver_InvalidOptions, + ); + } if ( options.attempts.nodeInterview < 1 || options.attempts.nodeInterview > 10 @@ -4629,7 +4648,8 @@ ${handlers.length} left`, // Step through the transaction as long as it gives us a next message while ((msg = await transaction.generateNextMessage(prevResult))) { - // TODO: refactor this nested loop or make it part of executeSerialAPICommand + // Keep track of how often the controller failed to send a command, to prevent ending up in an infinite loop + let jammedAttempts = 0; attemptMessage: for (let attemptNumber = 1;; attemptNumber++) { try { prevResult = await this.queueSerialAPICommand( @@ -4647,11 +4667,34 @@ ${handlers.length} left`, // Ensure the controller didn't actually transmit && prevResult.txReport?.txTicks === 0 ) { - // The controller is jammed. Wait a second, then try again. - this.controller.setStatus(ControllerStatus.Jammed); - await wait(1000, true); - - continue attemptMessage; + jammedAttempts++; + if ( + jammedAttempts + < this.options.attempts.sendDataJammed + ) { + // The controller is jammed. Wait a bit, then try again. + this.controller.setStatus( + ControllerStatus.Jammed, + ); + await wait( + this.options.timeouts.retryJammed, + true, + ); + + continue attemptMessage; + } else { + // Maybe this isn't actually the controller being jammed. Give up on this command. + this.controller.setStatus( + ControllerStatus.Ready, + ); + + throw new ZWaveError( + `Failed to send the command after ${jammedAttempts} attempts`, + ZWaveErrorCodes.Controller_MessageDropped, + prevResult, + transaction.stack, + ); + } } if ( @@ -4699,12 +4742,18 @@ ${handlers.length} left`, } else if (isMissingControllerACK(e)) { // The controller is unresponsive. Reject the transaction, so we can attempt to recover throw e; + } else if ( + e.code === ZWaveErrorCodes.Controller_MessageDropped + ) { + // We gave up on this command, so don't retry it + throw e; } if ( this.mayRetrySerialAPICommand( msg, - attemptNumber, + // Ignore the number of attempts while jammed + attemptNumber - jammedAttempts, e.code, ) ) { diff --git a/packages/zwave-js/src/lib/driver/ZWaveOptions.ts b/packages/zwave-js/src/lib/driver/ZWaveOptions.ts index 646e7227c618..901a133b20e2 100644 --- a/packages/zwave-js/src/lib/driver/ZWaveOptions.ts +++ b/packages/zwave-js/src/lib/driver/ZWaveOptions.ts @@ -29,6 +29,9 @@ export interface ZWaveOptions extends ZWaveHostOptions { /** How long generated nonces are valid */ nonce: number; // [3000...20000], default: 5000 ms + /** How long to wait before retrying a command when the controller is jammed */ + retryJammed: number; // [10...5000], default: 1000 ms + /** * How long to wait without pending commands before sending a node back to sleep. * Should be as short as possible to save battery, but long enough to give applications time to react. @@ -73,6 +76,9 @@ export interface ZWaveOptions extends ZWaveHostOptions { /** How often the driver should try sending SendData commands before giving up */ sendData: number; // [1...5], default: 3 + /** How often the driver should retry SendData commands while the controller is jammed */ + sendDataJammed: number; // [1...10], default: 5 + /** * How many attempts should be made for each node interview before giving up */ diff --git a/packages/zwave-js/src/lib/test/driver/controllerJammed.test.ts b/packages/zwave-js/src/lib/test/driver/controllerJammed.test.ts index 17c4707ccd5e..8f9b08aaf7be 100644 --- a/packages/zwave-js/src/lib/test/driver/controllerJammed.test.ts +++ b/packages/zwave-js/src/lib/test/driver/controllerJammed.test.ts @@ -1,4 +1,10 @@ -import { ControllerStatus, NodeStatus, TransmitStatus } from "@zwave-js/core"; +import { + ControllerStatus, + NodeStatus, + TransmitStatus, + ZWaveErrorCodes, + assertZWaveError, +} from "@zwave-js/core"; import { type MockControllerBehavior } from "@zwave-js/testing"; import { wait } from "alcalzone-shared/async"; import sinon from "sinon"; @@ -13,6 +19,7 @@ import { SendDataResponse, } from "../../serialapi/transport/SendDataMessages"; import { integrationTest } from "../integrationTestSuite"; +import { integrationTest as integrationTestMulti } from "../integrationTestSuiteMulti"; let shouldFail = false; @@ -130,3 +137,119 @@ integrationTest("update the controller status and wait if TX status is Fail", { ]); }, }); + +integrationTestMulti( + "Prevent an infinite loop when the controller is unable to transmit a command to a specific node", + { + // debug: true, + // provisioningDirectory: path.join( + // __dirname, + // "__fixtures/supervision_binary_switch", + // ), + + additionalDriverOptions: { + testingHooks: { + skipNodeInterview: true, + }, + }, + + nodeCapabilities: [ + { + id: 2, + capabilities: { + isListening: true, + }, + }, + { + id: 3, + capabilities: { + isListening: true, + }, + }, + ], + + customSetup: async (driver, controller, mockNodes) => { + // Return a TX status of Fail when desired + const handleSendData: MockControllerBehavior = { + async onHostMessage(host, controller, msg) { + if (msg instanceof SendDataRequest) { + // Commands to node 3 work normally + if (msg.getNodeId() === 3) { + // Defer to the default behavior + return false; + } + + // Check if this command is legal right now + const state = controller.state.get( + MockControllerStateKeys.CommunicationState, + ) as MockControllerCommunicationState | undefined; + if ( + state != undefined + && state !== MockControllerCommunicationState.Idle + ) { + throw new Error( + "Received SendDataRequest while not idle", + ); + } + + // Put the controller into sending state + controller.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Sending, + ); + + // Notify the host that the message was sent + const res = new SendDataResponse(host, { + wasSent: true, + }); + await controller.sendToHost(res.serialize()); + + await wait(100); + + controller.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Idle, + ); + + const cb = new SendDataRequestTransmitReport(host, { + callbackId: msg.callbackId, + transmitStatus: TransmitStatus.Fail, + txReport: { + txTicks: 0, + routeSpeed: 0 as any, + routingAttempts: 0, + ackRSSI: 0, + }, + }); + await controller.sendToHost(cb.serialize()); + + return true; + } else if (msg instanceof SendDataAbort) { + // Put the controller into idle state + controller.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Idle, + ); + } + }, + }; + controller.defineBehavior(handleSendData); + }, + testBody: async (t, driver, nodes, mockController, mockNodes) => { + const [node2, node3] = nodes; + node2.markAsAlive(); + node3.markAsAlive(); + + driver.options.timeouts.retryJammed = 100; + + t.true(await node3.ping()); + await assertZWaveError( + t, + () => node2.commandClasses.Basic.set(99), + { + errorCode: ZWaveErrorCodes.Controller_MessageDropped, + }, + ); + }, + }, +);