Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: limit number of retries when sending a command results in the transmit status Fail #6361

Merged
merged 2 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/api/driver.md
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,9 @@ interface ZWaveOptions extends ZWaveHostOptions {
/** How long generated nonces are valid */
nonce: number; // [3000...20000], default: 5000 ms

/** How long to wait before retrying a command when the controller is jammed */
retryJammed: number; // [10...5000], default: 1000 ms

/**
* How long to wait without pending commands before sending a node back to sleep.
* Should be as short as possible to save battery, but long enough to give applications time to react.
Expand Down Expand Up @@ -736,6 +739,9 @@ interface ZWaveOptions extends ZWaveHostOptions {
/** How often the driver should try sending SendData commands before giving up */
sendData: number; // [1...5], default: 3

/** How often the driver should retry SendData commands while the controller is jammed */
sendDataJammed: number; // [1...10], default: 5

/**
* How many attempts should be made for each node interview before giving up
*/
Expand Down
63 changes: 56 additions & 7 deletions packages/zwave-js/src/lib/driver/Driver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ const defaultOptions: ZWaveOptions = {
nonce: 5000,
sendDataCallback: 65000, // as defined in INS13954
sendToSleep: 250, // The default should be enough time for applications to react to devices waking up
retryJammed: 1000,
refreshValue: 5000, // Default should handle most slow devices until we have a better solution
refreshValueAfterTransition: 1000, // To account for delays in the device
serialAPIStarted: 5000,
Expand All @@ -262,6 +263,7 @@ const defaultOptions: ZWaveOptions = {
openSerialPort: 10,
controller: 3,
sendData: 3,
sendDataJammed: 5,
nodeInterview: 5,
},
disableOptimisticValueUpdate: false,
Expand Down Expand Up @@ -313,6 +315,14 @@ function checkOptions(options: ZWaveOptions): void {
ZWaveErrorCodes.Driver_InvalidOptions,
);
}
if (
options.timeouts.retryJammed < 10 || options.timeouts.retryJammed > 5000
) {
throw new ZWaveError(
`The timeout for retrying while jammed must be between 10 and 5000 milliseconds!`,
ZWaveErrorCodes.Driver_InvalidOptions,
);
}
if (
options.timeouts.sendToSleep < 10 || options.timeouts.sendToSleep > 5000
) {
Expand Down Expand Up @@ -369,6 +379,15 @@ function checkOptions(options: ZWaveOptions): void {
ZWaveErrorCodes.Driver_InvalidOptions,
);
}
if (
options.attempts.sendDataJammed < 1
|| options.attempts.sendDataJammed > 10
) {
throw new ZWaveError(
`The SendData attempts while jammed must be between 1 and 10!`,
ZWaveErrorCodes.Driver_InvalidOptions,
);
}
if (
options.attempts.nodeInterview < 1
|| options.attempts.nodeInterview > 10
Expand Down Expand Up @@ -4629,7 +4648,8 @@ ${handlers.length} left`,

// Step through the transaction as long as it gives us a next message
while ((msg = await transaction.generateNextMessage(prevResult))) {
// TODO: refactor this nested loop or make it part of executeSerialAPICommand
// Keep track of how often the controller failed to send a command, to prevent ending up in an infinite loop
let jammedAttempts = 0;
attemptMessage: for (let attemptNumber = 1;; attemptNumber++) {
try {
prevResult = await this.queueSerialAPICommand(
Expand All @@ -4647,11 +4667,34 @@ ${handlers.length} left`,
// Ensure the controller didn't actually transmit
&& prevResult.txReport?.txTicks === 0
) {
// The controller is jammed. Wait a second, then try again.
this.controller.setStatus(ControllerStatus.Jammed);
await wait(1000, true);

continue attemptMessage;
jammedAttempts++;
if (
jammedAttempts
< this.options.attempts.sendDataJammed
) {
// The controller is jammed. Wait a bit, then try again.
this.controller.setStatus(
ControllerStatus.Jammed,
);
await wait(
this.options.timeouts.retryJammed,
true,
);

continue attemptMessage;
} else {
// Maybe this isn't actually the controller being jammed. Give up on this command.
this.controller.setStatus(
ControllerStatus.Ready,
);

throw new ZWaveError(
`Failed to send the command after ${jammedAttempts} attempts`,
ZWaveErrorCodes.Controller_MessageDropped,
prevResult,
transaction.stack,
);
}
}

if (
Expand Down Expand Up @@ -4699,12 +4742,18 @@ ${handlers.length} left`,
} else if (isMissingControllerACK(e)) {
// The controller is unresponsive. Reject the transaction, so we can attempt to recover
throw e;
} else if (
e.code === ZWaveErrorCodes.Controller_MessageDropped
) {
// We gave up on this command, so don't retry it
throw e;
}

if (
this.mayRetrySerialAPICommand(
msg,
attemptNumber,
// Ignore the number of attempts while jammed
attemptNumber - jammedAttempts,
e.code,
)
) {
Expand Down
6 changes: 6 additions & 0 deletions packages/zwave-js/src/lib/driver/ZWaveOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ export interface ZWaveOptions extends ZWaveHostOptions {
/** How long generated nonces are valid */
nonce: number; // [3000...20000], default: 5000 ms

/** How long to wait before retrying a command when the controller is jammed */
retryJammed: number; // [10...5000], default: 1000 ms

/**
* How long to wait without pending commands before sending a node back to sleep.
* Should be as short as possible to save battery, but long enough to give applications time to react.
Expand Down Expand Up @@ -73,6 +76,9 @@ export interface ZWaveOptions extends ZWaveHostOptions {
/** How often the driver should try sending SendData commands before giving up */
sendData: number; // [1...5], default: 3

/** How often the driver should retry SendData commands while the controller is jammed */
sendDataJammed: number; // [1...10], default: 5

/**
* How many attempts should be made for each node interview before giving up
*/
Expand Down
125 changes: 124 additions & 1 deletion packages/zwave-js/src/lib/test/driver/controllerJammed.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { ControllerStatus, NodeStatus, TransmitStatus } from "@zwave-js/core";
import {
ControllerStatus,
NodeStatus,
TransmitStatus,
ZWaveErrorCodes,
assertZWaveError,
} from "@zwave-js/core";
import { type MockControllerBehavior } from "@zwave-js/testing";
import { wait } from "alcalzone-shared/async";
import sinon from "sinon";
Expand All @@ -13,6 +19,7 @@ import {
SendDataResponse,
} from "../../serialapi/transport/SendDataMessages";
import { integrationTest } from "../integrationTestSuite";
import { integrationTest as integrationTestMulti } from "../integrationTestSuiteMulti";

let shouldFail = false;

Expand Down Expand Up @@ -130,3 +137,119 @@ integrationTest("update the controller status and wait if TX status is Fail", {
]);
},
});

integrationTestMulti.only(
"Prevent an infinite loop when the controller is unable to transmit a command to a specific node",
{
debug: true,
// provisioningDirectory: path.join(
// __dirname,
// "__fixtures/supervision_binary_switch",
// ),

additionalDriverOptions: {
testingHooks: {
skipNodeInterview: true,
},
},

nodeCapabilities: [
{
id: 2,
capabilities: {
isListening: true,
},
},
{
id: 3,
capabilities: {
isListening: true,
},
},
],

customSetup: async (driver, controller, mockNodes) => {
// Return a TX status of Fail when desired
const handleSendData: MockControllerBehavior = {
async onHostMessage(host, controller, msg) {
if (msg instanceof SendDataRequest) {
// Commands to node 3 work normally
if (msg.getNodeId() === 3) {
// Defer to the default behavior
return false;
}

// Check if this command is legal right now
const state = controller.state.get(
MockControllerStateKeys.CommunicationState,
) as MockControllerCommunicationState | undefined;
if (
state != undefined
&& state !== MockControllerCommunicationState.Idle
) {
throw new Error(
"Received SendDataRequest while not idle",
);
}

// Put the controller into sending state
controller.state.set(
MockControllerStateKeys.CommunicationState,
MockControllerCommunicationState.Sending,
);

// Notify the host that the message was sent
const res = new SendDataResponse(host, {
wasSent: true,
});
await controller.sendToHost(res.serialize());

await wait(100);

controller.state.set(
MockControllerStateKeys.CommunicationState,
MockControllerCommunicationState.Idle,
);

const cb = new SendDataRequestTransmitReport(host, {
callbackId: msg.callbackId,
transmitStatus: TransmitStatus.Fail,
txReport: {
txTicks: 0,
routeSpeed: 0 as any,
routingAttempts: 0,
ackRSSI: 0,
},
});
await controller.sendToHost(cb.serialize());

return true;
} else if (msg instanceof SendDataAbort) {
// Put the controller into idle state
controller.state.set(
MockControllerStateKeys.CommunicationState,
MockControllerCommunicationState.Idle,
);
}
},
};
controller.defineBehavior(handleSendData);
},
testBody: async (t, driver, nodes, mockController, mockNodes) => {
const [node2, node3] = nodes;
node2.markAsAlive();
node3.markAsAlive();

driver.options.timeouts.retryJammed = 100;

t.true(await node3.ping());
await assertZWaveError(
t,
() => node2.commandClasses.Basic.set(99),
{
errorCode: ZWaveErrorCodes.Controller_MessageDropped,
},
);
},
},
);
Loading