Skip to content

Commit

Permalink
Monitor backups with PagerDuty hook integration (#245).
Browse files Browse the repository at this point in the history
  • Loading branch information
witten committed Jan 27, 2020
1 parent e76d5ad commit bc02c12
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 9 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
1.5.0
* #245: Monitor backups with PagerDuty hook integration. See the documentation for more
information: https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
* #255: Add per-action hooks: "before_prune", "after_prune", "before_check", and "after_check".
* #274: Add ~/.config/borgmatic.d as another configuration directory default.
* #277: Customize Healthchecks log level via borgmatic "--monitoring-verbosity" flag.
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ borgmatic is powered by [Borg Backup](https://www.borgbackup.org/).
<a href="https://healthchecks.io/"><img src="docs/static/healthchecks.png" alt="Healthchecks" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href="https://cronitor.io/"><img src="docs/static/cronitor.png" alt="Cronitor" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href="https://cronhub.io/"><img src="docs/static/cronhub.png" alt="Cronhub" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href="https://www.pagerduty.com/"><img src="docs/static/pagerduty.png" alt="PagerDuty" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href="https://www.rsync.net/cgi-bin/borg.cgi?campaign=borg&adgroup=borgmatic"><img src="docs/static/rsyncnet.png" alt="rsync.net" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href="https://www.borgbase.com/?utm_source=borgmatic"><img src="docs/static/borgbase.png" alt="BorgBase" height="60px" style="margin-bottom:20px;"></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
Expand Down
9 changes: 9 additions & 0 deletions borgmatic/config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,15 @@ map:
for details.
example:
https://cronitor.link/d3x0c1
pagerduty:
type: str
desc: |
PagerDuty integration key used to notify PagerDuty when a backup errors. Create
an account at https://www.pagerduty.com/ if you'd like to use this service. See
https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
for details.
example:
a177cad45bd374409f78906a810a3074
cronhub:
type: str
desc: |
Expand Down
3 changes: 2 additions & 1 deletion borgmatic/hooks/dispatch.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import logging

from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, postgresql
from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql

logger = logging.getLogger(__name__)

HOOK_NAME_TO_MODULE = {
'healthchecks': healthchecks,
'cronitor': cronitor,
'cronhub': cronhub,
'pagerduty': pagerduty,
'postgresql_databases': postgresql,
'mysql_databases': mysql,
}
Expand Down
2 changes: 1 addition & 1 deletion borgmatic/hooks/monitor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from enum import Enum

MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub')
MONITOR_HOOK_NAMES = ('healthchecks', 'cronitor', 'cronhub', 'pagerduty')


class State(Enum):
Expand Down
62 changes: 62 additions & 0 deletions borgmatic/hooks/pagerduty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import datetime
import json
import logging
import platform

import requests

from borgmatic.hooks import monitor

logger = logging.getLogger(__name__)

EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'


def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run):
'''
If this is an error state, create a PagerDuty event with the given integration key. Use the
given configuration filename in any log entries. If this is a dry run, then don't actually
create an event.
'''
if state != monitor.State.FAIL:
logger.debug(
'{}: Ignoring unsupported monitoring {} in PagerDuty hook'.format(
config_filename, state.name.lower()
)
)
return

dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
logger.info('{}: Sending failure event to PagerDuty {}'.format(config_filename, dry_run_label))

if dry_run:
return

hostname = platform.node()
local_timestamp = (
datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).astimezone().isoformat()
)
payload = json.dumps(
{
'routing_key': integration_key,
'event_action': 'trigger',
'payload': {
'summary': 'backup failed on {}'.format(hostname),
'severity': 'error',
'source': hostname,
'timestamp': local_timestamp,
'component': 'borgmatic',
'group': 'backups',
'class': 'backup failure',
'custom_details': {
'hostname': hostname,
'configuration filename': config_filename,
'server time': local_timestamp,
},
},
}
)
logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload))

logging.getLogger('urllib3').setLevel(logging.ERROR)
requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
41 changes: 34 additions & 7 deletions docs/how-to/monitor-your-backups.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,15 @@ hooks](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#error-hoo
below for how to configure this.
4. **borgmatic monitoring hooks**: This feature integrates with monitoring
services like [Healthchecks](https://healthchecks.io/),
[Cronitor](https://cronitor.io), and [Cronhub](https://cronhub.io), and pings
these services whenever borgmatic runs. That way, you'll receive an alert when
something goes wrong or the service doesn't hear from borgmatic for a
configured interval. See
[Healthchecks
[Cronitor](https://cronitor.io), [Cronhub](https://cronhub.io), and
[PagerDuty](https://www.pagerduty.com/) and pings these services whenever
borgmatic runs. That way, you'll receive an alert when something goes wrong or
(for certain hooks) the service doesn't hear from borgmatic for a configured
interval. See [Healthchecks
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#healthchecks-hook), [Cronitor
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), and [Cronhub
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook)
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronitor-hook), [Cronhub
hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#cronhub-hook), and
[PagerDuty hook](https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook)
below for how to configure this.
3. **Third-party monitoring software**: You can use traditional monitoring
software to consume borgmatic JSON output and track when the last
Expand Down Expand Up @@ -200,6 +201,32 @@ mechanisms](https://docs.cronhub.io/integrations.html) when backups fail
or it doesn't hear from borgmatic for a certain period of time.


## PagerDuty hook

[PagerDuty](https://cronhub.io/) provides incident monitoring and alerting,
and borgmatic has built-in integration with it. Once you create a PagerDuty
account and <a
href="https://support.pagerduty.com/docs/services-and-integrations">service</a>
on their site, all you need to do is configure borgmatic with the unique
"Integration Key" for your service. Here's an example:


```yaml
hooks:
pagerduty: a177cad45bd374409f78906a810a3074
```

With this hook in place, borgmatic creates a PagerDuty event for your service
whenever backups fail. Specifically, if an error occurs during a `create`,
`prune`, or `check` action, borgmatic sends an event to PagerDuty after the
`on_error` hooks run. Note that borgmatic does not contact PagerDuty when a
backup starts or ends without error.

You can configure PagerDuty to notify you by a [variety of
mechanisms](https://support.pagerduty.com/docs/notifications) when backups
fail.


## Scripting borgmatic

To consume the output of borgmatic in other software, you can include an
Expand Down
Binary file added docs/static/pagerduty.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
35 changes: 35 additions & 0 deletions tests/unit/hooks/test_pagerduty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from flexmock import flexmock

from borgmatic.hooks import pagerduty as module


def test_ping_monitor_ignores_start_state():
flexmock(module.requests).should_receive('post').never()

module.ping_monitor(
'abc123', 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=False
)


def test_ping_monitor_ignores_finish_state():
flexmock(module.requests).should_receive('post').never()

module.ping_monitor(
'abc123', 'config.yaml', module.monitor.State.FINISH, monitoring_log_level=1, dry_run=False
)


def test_ping_monitor_calls_api_for_fail_state():
flexmock(module.requests).should_receive('post')

module.ping_monitor(
'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=False
)


def test_ping_monitor_dry_run_does_not_call_api():
flexmock(module.requests).should_receive('post').never()

module.ping_monitor(
'abc123', 'config.yaml', module.monitor.State.FAIL, monitoring_log_level=1, dry_run=True
)

0 comments on commit bc02c12

Please sign in to comment.