Skip to content

Commit

Permalink
Add status command and Docker HEALTHCHECK
Browse files Browse the repository at this point in the history
  • Loading branch information
dpup committed Sep 18, 2024
1 parent e876926 commit a317581
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 22 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
**/.DS_Store
**/__pycache__
**/.venv
**/.classpath
**/.env
**/.envrc
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--home "/appuser" \
--shell "/sbin/nologin" \
--no-create-home \
--uid "${UID}" \
appuser

Expand All @@ -23,4 +22,6 @@ RUN --mount=type=cache,target=/root/.cache/pip \
USER appuser
COPY . .

HEALTHCHECK --interval=60s --timeout=10s --start-period=30s --retries=3 CMD python3 check-mate --status

ENTRYPOINT python3 -m check-mate
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Then in a private channel on a different node to the one connected to `check-mat
| --host | HOST | The IP or hostname of the meshtastic node, e.g. `192.168.5.10` |
| --location | LOCATION | Text description of where your node is, e.g. `SF Mission District` |
| --healthcheck | HEALTHCHECKURL | URL to send healthcheck pings to when receiving messages |
| --status | N/A | Print JSON of latest status |

## Example radio check

Expand Down
92 changes: 72 additions & 20 deletions check-mate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
import meshtastic
import meshtastic.tcp_interface

from status import readStatus, writeStatus
import json

"""Max frequency with which to report healthchecks."""
HEALTH_CHECK_THROTTLE = 60

"""Max amount of time since radio traffic was received before we consider process unhealthy."""
UNHEALTHY_TIMEOUT = 5 * 60


class CheckMate:
"""Manages connection with meshtastic node, monitoring private channels and responds to radio checks"""
Expand All @@ -26,6 +35,10 @@ def __init__(self, host, location=None, healthCheckURL=None):
self.lastHealthCheck = None
self.healthCheckURL = healthCheckURL
self.logger = logging.getLogger(__name__)
self.status = {
"status": "starting",
"start_time": time.time(),
}

pub.subscribe(self.onReceive, "meshtastic.receive")
pub.subscribe(self.onConnect, "meshtastic.connection.established")
Expand All @@ -34,45 +47,66 @@ def __init__(self, host, location=None, healthCheckURL=None):
def start(self):
"""Start the connection and listen for incoming messages"""

while True:
try:
self.logger.info("Connecting...", extra={"host": self.host})
self.connected = True
self.iface = meshtastic.tcp_interface.TCPInterface(hostname=self.host)
try:

while True:
try:
self.logger.info("Connecting...", extra={"host": self.host})
self.connected = True
self.iface = meshtastic.tcp_interface.TCPInterface(
hostname=self.host
)
while self.connected:
time.sleep(5)
except KeyboardInterrupt:
# On keyboard interrupt, close the connection and exit.
self.logger.info("Shutting down...", extra={"host": self.host})
self.iface.close()
return 0

except Exception as ex:
self.logger.error(
"Error with connection: %s",
ex,
extra={"host": self.host, "error": ex},
)
self.logger.info("Retrying in 5 seconds...")
time.sleep(5)
if (
time.time() - self.status["last_device_ping"]
> UNHEALTHY_TIMEOUT
):
self.setStatus("unknown")

except Exception as ex:
self.logger.error(
"Error with connection: %s",
ex,
extra={"host": self.host, "error": ex},
)
self.logger.info("Retrying in 5 seconds...")
self.setStatus("restarting")
time.sleep(5)

except KeyboardInterrupt:
self.logger.info("Shutting down...", extra={"host": self.host})
self.setStatus("shutdown")
return 0

def setStatus(self, status, ping=False):
"""updates current status"""
self.status["status"] = status
self.status["update_time"] = time.time()
self.status["user_count"] = len(self.users)
if ping:
self.status["last_device_ping"] = time.time()
writeStatus(self.status)

def onConnect(self, interface, topic=pub.AUTO_TOPIC):
"""called when we (re)connect to the radio"""
if interface.nodes:
for node in interface.nodes.values():
self.updateUser(node["user"])
self.logger.info("Connected...")
self.setStatus("connected", ping=True)

def onDisconnect(self, interface, topic=pub.AUTO_TOPIC):
"""called when we disconnect from the radio"""
self.logger.info("Disconnected... waiting for reconnect...")
self.connected = False
self.setStatus("disconnected")

def onReceive(self, packet, interface):
"""called when a packet arrives"""

self.reportHealth()
self.setStatus("active", ping=True)

try:
if self.isNodeInfo(packet):
Expand Down Expand Up @@ -113,7 +147,10 @@ def onReceive(self, packet, interface):

def reportHealth(self):
if self.healthCheckURL is not None:
if self.lastHealthCheck is None or time.time() - self.lastHealthCheck > 60:
if (
self.lastHealthCheck is None
or time.time() - self.lastHealthCheck > HEALTH_CHECK_THROTTLE
):
self.lastHealthCheck = time.time()
response = requests.head(self.healthCheckURL)
if response.status_code == 200:
Expand Down Expand Up @@ -258,6 +295,13 @@ def getLogFormat():
description="Monitors private channels and responds to radio checks",
epilog="Example: python3 check-mate.py --host meshtastic.local --location 'Base Camp' --healthcheck https://uptime.betterstack.com/api/v1/heartbeat/deadbeef",
)
parser.add_argument(
"--status",
action="store_true",
dest="status",
required=False,
help="Get status of the current check-mate process",
)
parser.add_argument(
"--host",
dest="host",
Expand All @@ -282,6 +326,14 @@ def getLogFormat():
)
args = parser.parse_args()

if args.status:
status = readStatus()
print(json.dumps(status))
if status["status"] == "active":
sys.exit(0)
else:
sys.exit(1)

if not args.host:
parser.error(
"Please provide a host via --host or the $HOST environment variable"
Expand Down
32 changes: 32 additions & 0 deletions status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import platform
from pathlib import Path
from typing import Dict
import json


def getStatusFilePath() -> Path:
"""Determine the appropriate status file path based on the OS."""
if platform.system() == "Darwin": # macOS
base_dir = Path.home() / "Library" / "Application Support" / "check-mate"
else: # Linux and others
base_dir = Path.home() / ".local" / "share" / "check-mate"

base_dir.mkdir(parents=True, exist_ok=True)
return base_dir / "status.json"


STATUS_FILE = getStatusFilePath()


def writeStatus(status: Dict[str, any]):
"""Write the current status to the status file."""
with open(STATUS_FILE, "w") as f:
json.dump(status, f)


def readStatus() -> Dict[str, any]:
"""Read the current status from the status file."""
if not STATUS_FILE.exists():
return {"status": "unknown"}
with open(STATUS_FILE, "r") as f:
return json.load(f)

0 comments on commit a317581

Please sign in to comment.