diff --git a/packages/core/src/error/ZWaveError.ts b/packages/core/src/error/ZWaveError.ts index 81261ad61d91..11bd3c87dec7 100644 --- a/packages/core/src/error/ZWaveError.ts +++ b/packages/core/src/error/ZWaveError.ts @@ -14,7 +14,7 @@ export enum ZWaveErrorCodes { /** The driver failed to start */ Driver_Failed = 100, - Driver_Reset, + Driver_Reset, // FIXME: This is not used Driver_Destroyed, Driver_NotReady, Driver_InvalidDataReceived, @@ -35,6 +35,9 @@ export enum ZWaveErrorCodes { Controller_ResponseNOK, Controller_CallbackNOK, Controller_Jammed, + /** The controller was reset in the middle of a Serial API command */ + Controller_Reset, + Controller_InclusionFailed, Controller_ExclusionFailed, @@ -294,6 +297,15 @@ export function isMissingControllerACK( && e.context === "ACK"; } +export function wasControllerReset( + e: unknown, +): e is ZWaveError & { + code: ZWaveErrorCodes.Controller_Reset; +} { + return isZWaveError(e) + && e.code === ZWaveErrorCodes.Controller_Reset; +} + export function isMissingControllerResponse( e: unknown, ): e is ZWaveError & { diff --git a/packages/serial/src/message/Constants.ts b/packages/serial/src/message/Constants.ts index 21f690b396df..5d67fe47d312 100644 --- a/packages/serial/src/message/Constants.ts +++ b/packages/serial/src/message/Constants.ts @@ -157,9 +157,9 @@ export enum FunctionType { UNKNOWN_FUNC_UNKNOWN_0xB4 = 0xb4, // ?? - UNKNOWN_FUNC_WATCH_DOG_ENABLE = 0xb6, - UNKNOWN_FUNC_WATCH_DOG_DISABLE = 0xb7, - UNKNOWN_FUNC_WATCH_DOG_KICK = 0xb8, + EnableWatchdog500 = 0xb6, // Enable Watchdog (500 series and older) + DisableWatchdog500 = 0xb7, // Disable Watchdog (500 series and older) + KickWatchdog500 = 0xb8, // Kick Watchdog (500 series and older) UNKNOWN_FUNC_UNKNOWN_0xB9 = 0xb9, // ?? UNKNOWN_FUNC_RF_POWERLEVEL_GET = 0xba, // Get RF Power level @@ -170,8 +170,9 @@ export enum FunctionType { FUNC_ID_ZW_SET_PROMISCUOUS_MODE = 0xd0, // Set controller into promiscuous mode to listen to all messages FUNC_ID_PROMISCUOUS_APPLICATION_COMMAND_HANDLER = 0xd1, - UNKNOWN_FUNC_UNKNOWN_0xD2 = 0xd2, // ?? - UNKNOWN_FUNC_UNKNOWN_0xD3 = 0xd3, // ?? + StartWatchdog = 0xd2, // Start Hardware Watchdog (700 series and newer) + StopWatchdog = 0xd3, // Stop Hardware Watchdog (700 series and newer) + UNKNOWN_FUNC_UNKNOWN_0xD4 = 0xd4, // ?? Shutdown = 0xd9, // Instruct the Z-Wave API to shut down in order to safely remove the power diff --git a/packages/serial/src/message/Message.ts b/packages/serial/src/message/Message.ts index 3cffa084ab82..1937cfbb78ce 100644 --- a/packages/serial/src/message/Message.ts +++ b/packages/serial/src/message/Message.ts @@ -330,6 +330,12 @@ export class Message { } } + /** Tests whether this message expects an ACK from the controller */ + public expectsAck(): boolean { + // By default, all commands expect an ACK + return true; + } + /** Tests whether this message expects a response from the controller */ public expectsResponse(): boolean { return !!this.expectedResponse; diff --git a/packages/zwave-js/src/lib/controller/Controller.ts b/packages/zwave-js/src/lib/controller/Controller.ts index addd9eea8545..342fa80652b2 100644 --- a/packages/zwave-js/src/lib/controller/Controller.ts +++ b/packages/zwave-js/src/lib/controller/Controller.ts @@ -130,10 +130,7 @@ import { ApplicationUpdateRequestSmartStartHomeIDReceived, ApplicationUpdateRequestSmartStartLongRangeHomeIDReceived, } from "../serialapi/application/ApplicationUpdateRequest"; -import { - type SerialAPIStartedRequest, - SerialAPIWakeUpReason, -} from "../serialapi/application/SerialAPIStartedRequest"; + import { ShutdownRequest, type ShutdownResponse, @@ -216,6 +213,10 @@ import { SetSerialApiTimeoutsRequest, type SetSerialApiTimeoutsResponse, } from "../serialapi/misc/SetSerialApiTimeoutsMessages"; +import { + StartWatchdogRequest, + StopWatchdogRequest, +} from "../serialapi/misc/WatchdogMessages"; import { AddNodeDSKToNetworkRequest, AddNodeStatus, @@ -449,10 +450,6 @@ export class ZWaveController FunctionType.ReplaceFailedNode, this.handleReplaceNodeStatusReport.bind(this), ); - driver.registerRequestHandler( - FunctionType.SerialAPIStarted, - this.handleSerialAPIStartedUnexpectedly.bind(this), - ); } private _type: MaybeNotKnown; @@ -739,6 +736,10 @@ export class ZWaveController public get nodeIdType(): NodeIDType { return this._nodeIdType; } + /** @internal */ + public set nodeIdType(value: NodeIDType) { + this._nodeIdType = value; + } /** Returns the node with the given DSK */ public getNodeByDSK(dsk: Buffer | string): ZWaveNode | undefined { @@ -1888,6 +1889,63 @@ export class ZWaveController } } + /** + * Starts the hardware watchdog on supporting 700+ series controllers. + * Returns whether the operation was successful. + */ + public async startWatchdog(): Promise { + if ( + this.sdkVersionGte("7.0") + && this.isFunctionSupported(FunctionType.StartWatchdog) + ) { + try { + this.driver.controllerLog.print( + "Starting hardware watchdog...", + ); + await this.driver.sendMessage( + new StartWatchdogRequest(this.driver), + ); + + return true; + } catch (e) { + this.driver.controllerLog.print( + `Starting the hardware watchdog failed: ${ + getErrorMessage(e) + }`, + "error", + ); + } + } + return false; + } + + /** + * Stops the hardware watchdog on supporting controllers. + * Returns whether the operation was successful. + */ + public async stopWatchdog(): Promise { + if (this.isFunctionSupported(FunctionType.StopWatchdog)) { + try { + this.driver.controllerLog.print( + "Stopping hardware watchdog...", + ); + await this.driver.sendMessage( + new StopWatchdogRequest(this.driver), + ); + + return true; + } catch (e) { + this.driver.controllerLog.print( + `Stopping the hardware watchdog failed: ${ + getErrorMessage(e) + }`, + "error", + ); + } + } + return false; + } + private _inclusionState: InclusionState = InclusionState.Idle; public get inclusionState(): InclusionState { return this._inclusionState; @@ -4331,43 +4389,6 @@ supported CCs: ${ return false; } - /** - * Is called when the Serial API restart unexpectedly. - */ - private async handleSerialAPIStartedUnexpectedly( - msg: SerialAPIStartedRequest, - ): Promise { - // Normally, the soft reset command includes waiting for this message. - // If we end up here, it is unexpected. - - switch (msg.wakeUpReason) { - // All wakeup reasons that indicate a reset of the Serial API - // need to be handled here, so we interpret node IDs correctly. - case SerialAPIWakeUpReason.Reset: - case SerialAPIWakeUpReason.WatchdogReset: - case SerialAPIWakeUpReason.SoftwareReset: - case SerialAPIWakeUpReason.EmergencyWatchdogReset: - case SerialAPIWakeUpReason.BrownoutCircuit: { - // The Serial API restarted unexpectedly - if (this._nodeIdType === NodeIDType.Long) { - this.driver.controllerLog.print( - `Serial API restarted unexpectedly.`, - "warn", - ); - - // We previously used 16 bit node IDs, but the controller was reset. - // Remember this and try to go back to 16 bit. - this._nodeIdType = NodeIDType.Short; - await this.trySetNodeIDType(NodeIDType.Long); - } - - return true; // Don't invoke any more handlers - } - } - - return false; // Not handled - } - private _rebuildRoutesProgress = new Map(); /** * If routes are currently being rebuilt for the entire network, this returns the current progress. @@ -6945,6 +6966,9 @@ ${associatedNodes.join(", ")}`, ); } + // Disable watchdog to prevent resets during NVM access + await this.stopWatchdog(); + let ret: Buffer; try { if (this.sdkVersionGte("7.0")) { @@ -6952,6 +6976,7 @@ ${associatedNodes.join(", ")}`, // All 7.xx versions so far seem to have a bug where the NVM is not properly closed after reading // resulting in extremely strange controller behavior after a backup. To work around this, restart the stick if possible await this.driver.trySoftReset(); + // Soft-resetting will enable the watchdog again } else { ret = await this.backupNVMRaw500(onProgress); } @@ -7071,6 +7096,9 @@ ${associatedNodes.join(", ")}`, ); } + // Disable watchdog to prevent resets during NVM access + await this.stopWatchdog(); + // Restoring a potentially incompatible NVM happens in three steps: // 1. the current NVM is read // 2. the given NVM data is converted to match the current format diff --git a/packages/zwave-js/src/lib/driver/Driver.ts b/packages/zwave-js/src/lib/driver/Driver.ts index 6f95b2b639f7..8bb35ed26f47 100644 --- a/packages/zwave-js/src/lib/driver/Driver.ts +++ b/packages/zwave-js/src/lib/driver/Driver.ts @@ -100,6 +100,7 @@ import { serializeCacheValue, stripUndefined, timespan, + wasControllerReset, } from "@zwave-js/core"; import type { NodeSchedulePollOptions, @@ -171,7 +172,10 @@ import { import { ApplicationCommandRequest } from "../serialapi/application/ApplicationCommandRequest"; import { ApplicationUpdateRequest } from "../serialapi/application/ApplicationUpdateRequest"; import { BridgeApplicationCommandRequest } from "../serialapi/application/BridgeApplicationCommandRequest"; -import type { SerialAPIStartedRequest } from "../serialapi/application/SerialAPIStartedRequest"; +import { + SerialAPIStartedRequest, + SerialAPIWakeUpReason, +} from "../serialapi/application/SerialAPIStartedRequest"; import { GetControllerVersionRequest } from "../serialapi/capability/GetControllerVersionMessages"; import { SoftResetRequest } from "../serialapi/misc/SoftResetRequest"; import { @@ -290,6 +294,8 @@ const defaultOptions: ZWaveOptions = { // By default enable the unresponsive controller recovery unless the env variable is set unresponsiveControllerRecovery: !process.env .ZWAVEJS_DISABLE_UNRESPONSIVE_CONTROLLER_RECOVERY, + // By default enable the watchdog, unless the env variable is set + watchdog: !process.env.ZWAVEJS_DISABLE_WATCHDOG, }, interview: { queryAllUserCodes: false, @@ -2663,6 +2669,11 @@ export class Driver extends TypedEventEmitter // This is a bit hacky, but what the heck... if (!this._enteringBootloader) { + // Start the watchdog again, unless disabled + if (this.options.features.watchdog) { + await this._controller?.startWatchdog(); + } + // If desired, re-configure the controller to use 16 bit node IDs void this._controller?.trySetNodeIDType(NodeIDType.Long); @@ -4195,6 +4206,69 @@ export class Driver extends TypedEventEmitter } } + /** + * Is called when the Serial API restart unexpectedly. + */ + private async handleSerialAPIStartedUnexpectedly( + msg: SerialAPIStartedRequest, + ): Promise { + // Normally, the soft reset command includes waiting for this message. + // If we end up here, it is unexpected. + + switch (msg.wakeUpReason) { + // All wakeup reasons that indicate a reset of the Serial API + // need to be handled here, so we interpret node IDs correctly. + case SerialAPIWakeUpReason.Reset: + case SerialAPIWakeUpReason.WatchdogReset: + case SerialAPIWakeUpReason.SoftwareReset: + case SerialAPIWakeUpReason.EmergencyWatchdogReset: + case SerialAPIWakeUpReason.BrownoutCircuit: { + // The Serial API restarted unexpectedly + this.controllerLog.print( + `Serial API restarted unexpectedly.`, + "warn", + ); + + // In this situation, we may be executing a Serial API command, which will never complete. + // Bail out of it, without rejecting the actual transaction + if ( + this.serialAPIInterpreter?.status + === InterpreterStatus.Running + ) { + this.serialAPIInterpreter.stop(); + } + if (this._currentSerialAPICommandPromise) { + this.controllerLog.print( + `Currently active command will be retried...`, + "warn", + ); + this._currentSerialAPICommandPromise.reject( + new ZWaveError( + "The Serial API restarted unexpectedly", + ZWaveErrorCodes.Controller_Reset, + ), + ); + } + + // Restart the watchdog unless disabled + if (this.options.features.watchdog) { + await this._controller?.startWatchdog(); + } + + if (this._controller?.nodeIdType === NodeIDType.Long) { + // We previously used 16 bit node IDs, but the controller was reset. + // Remember this and try to go back to 16 bit. + this._controller.nodeIdType = NodeIDType.Short; + await this._controller.trySetNodeIDType(NodeIDType.Long); + } + + return true; // Don't invoke any more handlers + } + } + + return false; // Not handled + } + /** * Registers a handler for messages that are not handled by the driver as part of a message exchange. * The handler function needs to return a boolean indicating if the message has been handled. @@ -4646,6 +4720,10 @@ ${handlers.length} left`, // Make sure we're ready to handle this command this.ensureReady(true); return this.controller.handleApplicationUpdateRequest(msg); + } else if (msg instanceof SerialAPIStartedRequest) { + if (await this.handleSerialAPIStartedUnexpectedly(msg)) { + return; + } } else { // TODO: This deserves a nicer formatting this.driverLog.print( @@ -5137,6 +5215,9 @@ ${handlers.length} left`, } else if (isMissingControllerACK(e)) { // The controller is unresponsive. Reject the transaction, so we can attempt to recover throw e; + } else if (wasControllerReset(e)) { + // The controller was reset unexpectedly. Reject the transaction, so we can attempt to recover + throw e; } else if ( e.code === ZWaveErrorCodes.Controller_MessageDropped ) { @@ -5169,6 +5250,13 @@ ${handlers.length} left`, transaction.setProgress({ state: TransactionState.Completed }); } + /** + * Provides access to the result Promise for the currently executed serial API command + */ + private _currentSerialAPICommandPromise: + | DeferredPromise + | undefined; + /** Handles sequencing of queued Serial API commands */ private async drainSerialAPIQueue(): Promise { for await (const item of this.serialAPIQueue) { @@ -5181,6 +5269,8 @@ ${handlers.length} left`, result.resolve(ret); } catch (e) { result.reject(e); + } finally { + this._currentSerialAPICommandPromise = undefined; } } } @@ -5355,6 +5445,7 @@ ${handlers.length} left`, this.serialAPIInterpreter.start(); + this._currentSerialAPICommandPromise = result; return result; } @@ -6000,6 +6091,15 @@ ${handlers.length} left`, if ( this.handleMissingSendDataResponseOrCallback(transaction, error) ) return; + } else if (wasControllerReset(error)) { + // The controller was reset in the middle of a transaction. + // Re-queue the transaction, so it can get handled again + // Its message generator may have finished, so reset that too. + transaction.reset(); + this.getQueueForTransaction(transaction).add( + transaction.clone(), + ); + return; } this.rejectTransaction(transaction, error); diff --git a/packages/zwave-js/src/lib/driver/SerialAPICommandMachine.ts b/packages/zwave-js/src/lib/driver/SerialAPICommandMachine.ts index b64054181b0c..fca75ef65221 100644 --- a/packages/zwave-js/src/lib/driver/SerialAPICommandMachine.ts +++ b/packages/zwave-js/src/lib/driver/SerialAPICommandMachine.ts @@ -207,6 +207,7 @@ export function getSerialAPICommandMachineConfig( }, }, waitForACK: { + always: [{ target: "success", cond: "expectsNoAck" }], on: { CAN: { target: "retry", @@ -390,6 +391,7 @@ export function getSerialAPICommandMachineOptions( guards: { mayRetry: (ctx) => ctx.attempts < ctx.maxAttempts, isSendData: (ctx) => isSendData(ctx.msg), + expectsNoAck: (ctx) => !ctx.msg.expectsAck(), expectsNoResponse: (ctx) => !ctx.msg.expectsResponse(), expectsNoCallback: (ctx) => !ctx.msg.expectsCallback(), isExpectedMessage: (ctx, evt, meta) => diff --git a/packages/zwave-js/src/lib/driver/ZWaveOptions.ts b/packages/zwave-js/src/lib/driver/ZWaveOptions.ts index 21809827f2ac..5e9d7b247494 100644 --- a/packages/zwave-js/src/lib/driver/ZWaveOptions.ts +++ b/packages/zwave-js/src/lib/driver/ZWaveOptions.ts @@ -217,6 +217,14 @@ export interface ZWaveOptions extends ZWaveHostOptions { * Default: `true`, except when the ZWAVEJS_DISABLE_UNRESPONSIVE_CONTROLLER_RECOVERY env variable is set. */ unresponsiveControllerRecovery?: boolean; + + /** + * Controllers of the 700 series and newer have a hardware watchdog that can be enabled to automatically + * reset the chip in case it becomes unresponsive. This option controls whether the watchdog should be enabled. + * + * Default: `true`, except when the ZWAVEJS_DISABLE_WATCHDOG env variable is set. + */ + watchdog?: boolean; }; /** diff --git a/packages/zwave-js/src/lib/serialapi/misc/WatchdogMessages.ts b/packages/zwave-js/src/lib/serialapi/misc/WatchdogMessages.ts new file mode 100644 index 000000000000..8ebba434669b --- /dev/null +++ b/packages/zwave-js/src/lib/serialapi/misc/WatchdogMessages.ts @@ -0,0 +1,26 @@ +import { MessagePriority } from "@zwave-js/core"; +import { + FunctionType, + Message, + MessageType, + messageTypes, + priority, +} from "@zwave-js/serial"; + +// These commands expect no ACK, no response, no callback + +@messageTypes(MessageType.Request, FunctionType.StartWatchdog) +@priority(MessagePriority.Controller) +export class StartWatchdogRequest extends Message { + public override expectsAck(): boolean { + return false; + } +} + +@messageTypes(MessageType.Request, FunctionType.StopWatchdog) +@priority(MessagePriority.Controller) +export class StopWatchdogRequest extends Message { + public override expectsAck(): boolean { + return false; + } +} diff --git a/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts b/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts index a4b42cab2cae..66fe537c9511 100644 --- a/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts +++ b/packages/zwave-js/src/lib/test/driver/sendDataMissingCallbackAbort.test.ts @@ -15,6 +15,11 @@ import { } from "@zwave-js/core"; import path from "node:path"; import Sinon from "sinon"; +import { determineNIF } from "../../controller/NodeInformationFrame"; +import { + SerialAPIStartedRequest, + SerialAPIWakeUpReason, +} from "../../serialapi/application/SerialAPIStartedRequest"; import { SoftResetRequest } from "../../serialapi/misc/SoftResetRequest"; import { RequestNodeInfoRequest, @@ -825,3 +830,109 @@ integrationTestMulti( }, }, ); + +integrationTest( + "Retry transmissions if the controller is reset by the watchdog while waiting for the callback", + { + // debug: true, + + // provisioningDirectory: path.join( + // __dirname, + // "__fixtures/supervision_binary_switch", + // ), + + additionalDriverOptions: { + testingHooks: { + skipNodeInterview: true, + }, + }, + + customSetup: async (driver, mockController, mockNode) => { + // This is almost a 1:1 copy of the default behavior, except that the callback never gets sent + const handleBrokenSendData: MockControllerBehavior = { + async onHostMessage(host, controller, msg) { + // If the controller is operating normally, defer to the default behavior + if (!shouldTimeOut) return false; + + if (msg instanceof SendDataRequest) { + // Check if this command is legal right now + const state = controller.state.get( + MockControllerStateKeys.CommunicationState, + ) as MockControllerCommunicationState | undefined; + if ( + state != undefined + && state !== MockControllerCommunicationState.Idle + ) { + throw new Error( + "Received SendDataRequest while not idle", + ); + } + + // Put the controller into sending state + controller.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Sending, + ); + + // Notify the host that the message was sent + const res = new SendDataResponse(host, { + wasSent: true, + }); + await controller.sendToHost(res.serialize()); + + return true; + } else if (msg instanceof SendDataAbort) { + // Put the controller into idle state + controller.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Idle, + ); + + return true; + } + }, + }; + mockController.defineBehavior(handleBrokenSendData); + }, + testBody: async (t, driver, node, mockController, mockNode) => { + // Circumvent the options validation so the test doesn't take forever + driver.options.timeouts.sendDataAbort = 1500; + driver.options.timeouts.sendDataCallback = 2000; + + shouldTimeOut = true; + + const pingPromise = node.ping(); + + await wait(1000); + + // After 1 second, the watchdog restarts the controller + shouldTimeOut = false; + + mockController.state.set( + MockControllerStateKeys.CommunicationState, + MockControllerCommunicationState.Idle, + ); + + const ret = new SerialAPIStartedRequest(mockController.host, { + wakeUpReason: SerialAPIWakeUpReason.WatchdogReset, + watchdogEnabled: true, + isListening: true, + ...determineNIF(), + supportsLongRange: true, + }); + setImmediate(async () => { + await mockController.sendToHost(ret.serialize()); + }); + + // And the ping should eventually succeed + t.true(await pingPromise); + + // But the transmission should not have been aborted + t.throws(() => + mockController.assertReceivedHostMessage( + (msg) => msg.functionType === FunctionType.SendDataAbort, + ) + ); + }, + }, +); diff --git a/packages/zwave-js/src/lib/test/messages.ts b/packages/zwave-js/src/lib/test/messages.ts index d2688b736d19..48ed82c9b7e7 100644 --- a/packages/zwave-js/src/lib/test/messages.ts +++ b/packages/zwave-js/src/lib/test/messages.ts @@ -11,6 +11,7 @@ const defaultImplementations = { getCallbackTimeout: () => undefined, markAsSent: () => void 0, markAsCompleted: () => void 0, + expectsAck: () => true, }; export const dummyResponseOK = {