Skip to content

Commit

Permalink
Add --project flag for filtering + adds node label to all machine m…
Browse files Browse the repository at this point in the history
…etrics (#23)

* Adds a new flag --project, and uses the value of that to filter machines/sites on per project basis so we don't pollute mlab-oti's GMX instance with staging and sandbox machines, and vice versa.

* Adds a 'node' label to all machine metrics.

* Don't enter maintenance more than once for a particular issue. Uses maintenance constantants instead of fixed integers.
  • Loading branch information
nkinkade authored Sep 18, 2019
1 parent 4299352 commit b17fb66
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 15 deletions.
56 changes: 44 additions & 12 deletions gmx.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"os"
"regexp"
"strconv"
"strings"
"sync"

"github.com/google/go-github/github"
Expand All @@ -44,13 +45,23 @@ var (
fListenAddress string // Interface and port to listen on.
fStateFilePath string // Filesystem path to write the maintenance state file.
fGitHubSecretPath string // Filesystem path to file which contains the shared Github secret.
fProject string // GCP project where this instance is running.

githubSecret []byte // The symetric secret used to validate that the webhook actually came from Github.

mux sync.Mutex

machineRegExp = regexp.MustCompile(`\/machine (mlab[1-4]{1}\.[a-z]{3}[0-9c]{2})\s?(del)?`)
siteRegExp = regexp.MustCompile(`\/site ([a-z]{3}[0-9c]{2})\s?(del)?`)
machineRegExps = map[string]*regexp.Regexp{
"mlab-sandbox": regexp.MustCompile(`\/machine\s+(mlab[1-4]\.[a-z]{3}[0-9]t)\s+(del)?`),
"mlab-staging": regexp.MustCompile(`\/machine\s+(mlab[4]\.[a-z]{3}[0-9c]{2})\s+(del)?`),
"mlab-oti": regexp.MustCompile(`\/machine\s+(mlab[1-3]\.[a-z]{3}[0-9c]{2})\s+(del)?`),
}

siteRegExps = map[string]*regexp.Regexp{
"mlab-sandbox": regexp.MustCompile(`\/site\s+([a-z]{3}[0-9]t)\s+(del)?`),
"mlab-staging": regexp.MustCompile(`\/site\s+([a-z]{3}[0-9c]{2})\s+(del)?`),
"mlab-oti": regexp.MustCompile(`\/site\s+([a-z]{3}[0-9c]{2})\s+(del)?`),
}

// Prometheus metric for exposing any errors that the exporter encounters.
metricError = prometheus.NewCounterVec(
Expand All @@ -71,6 +82,7 @@ var (
},
[]string{
"machine",
"node",
},
)
// Prometheus metric for exposing site maintenance status.
Expand Down Expand Up @@ -135,7 +147,7 @@ func restoreState(r io.Reader, s *maintenanceState) error {

// Restore machine maintenance state.
for machine := range s.Machines {
metricMachine.WithLabelValues(machine).Set(cEnterMaintenance)
metricMachine.WithLabelValues(machine, machine).Set(cEnterMaintenance)
}

// Restore site maintenance state.
Expand Down Expand Up @@ -174,7 +186,13 @@ func removeIssue(stateMap map[string][]string, mapKey string, metricState *prome
mapElement = mapElement[:len(mapElement)-1]
if len(mapElement) == 0 {
delete(stateMap, mapKey)
metricState.WithLabelValues(mapKey).Set(0)
// If this is a machine state, then we need to pass mapKey twice, once for the
// "machine" label and once for the "node" label.
if strings.HasPrefix(mapKey, "mlab") {
metricState.WithLabelValues(mapKey, mapKey).Set(0)
} else {
metricState.WithLabelValues(mapKey).Set(0)
}
} else {
stateMap[mapKey] = mapElement
}
Expand Down Expand Up @@ -220,9 +238,21 @@ func updateState(stateMap map[string][]string, mapKey string, metricState *prome
case cLeaveMaintenance:
removeIssue(stateMap, mapKey, metricState, issueNumber)
case cEnterMaintenance:
// Don't enter maintenance more than once for a given issue.
issueIndex := stringInSlice(issueNumber, stateMap[mapKey])
if issueIndex >= 0 {
log.Printf("INFO: %s is already in maintenance for issue #%s", mapKey, issueNumber)
return
}
mux.Lock()
stateMap[mapKey] = append(stateMap[mapKey], issueNumber)
metricState.WithLabelValues(mapKey).Set(action)
// If this is a machine state, then we need to pass mapKey twice, once for the
// "machine" label and once for the "node" label.
if strings.HasPrefix(mapKey, "mlab") {
metricState.WithLabelValues(mapKey, mapKey).Set(action)
} else {
metricState.WithLabelValues(mapKey).Set(action)
}
log.Printf("INFO: %s was added to maintenance for issue #%s", mapKey, issueNumber)
mux.Unlock()
default:
Expand All @@ -235,9 +265,9 @@ func updateState(stateMap map[string][]string, mapKey string, metricState *prome
// added to or removed from maintenance mode. If any matches are found, it
// updates the state for the item. The return value is the number of
// modifications that were made to the machine and site maintenance state.
func parseMessage(msg string, issueNumber string, s *maintenanceState) int {
func parseMessage(msg string, issueNumber string, s *maintenanceState, project string) int {
var mods = 0
machineMatches := machineRegExp.FindAllStringSubmatch(msg, -1)
machineMatches := machineRegExps[project].FindAllStringSubmatch(msg, -1)
if len(machineMatches) > 0 {
for _, machine := range machineMatches {
log.Printf("INFO: Flag found for machine: %s", machine[1])
Expand All @@ -252,15 +282,15 @@ func parseMessage(msg string, issueNumber string, s *maintenanceState) int {
}
}

siteMatches := siteRegExp.FindAllStringSubmatch(msg, -1)
siteMatches := siteRegExps[project].FindAllStringSubmatch(msg, -1)
if len(siteMatches) > 0 {
for _, site := range siteMatches {
log.Printf("INFO: Flag found for site: %s", site[1])
if site[2] == "del" {
updateState(s.Sites, site[1], metricSite, issueNumber, 0)
updateState(s.Sites, site[1], metricSite, issueNumber, cLeaveMaintenance)
mods++
} else {
updateState(s.Sites, site[1], metricSite, issueNumber, 1)
updateState(s.Sites, site[1], metricSite, issueNumber, cEnterMaintenance)
mods++
}
}
Expand Down Expand Up @@ -314,7 +344,7 @@ func receiveHook(resp http.ResponseWriter, req *http.Request) {
log.Printf("INFO: Issue #%s was %s.", issueNumber, eventAction)
mods = closeIssue(issueNumber, &state)
case "opened", "edited":
mods = parseMessage(event.Issue.GetBody(), issueNumber, &state)
mods = parseMessage(event.Issue.GetBody(), issueNumber, &state, fProject)
default:
log.Printf("INFO: Unsupported IssueEvent action: %s.", eventAction)
status = http.StatusNotImplemented
Expand All @@ -324,7 +354,7 @@ func receiveHook(resp http.ResponseWriter, req *http.Request) {
issueNumber = strconv.Itoa(event.Issue.GetNumber())
issueState := event.Issue.GetState()
if issueState == "open" {
mods = parseMessage(event.Comment.GetBody(), issueNumber, &state)
mods = parseMessage(event.Comment.GetBody(), issueNumber, &state, fProject)
} else {
log.Printf("INFO: Ignoring IssueComment event on closed issue #%s.", issueNumber)
status = http.StatusExpectationFailed
Expand Down Expand Up @@ -382,6 +412,8 @@ func init() {
"Filesystem path for the state file.")
flag.StringVar(&fGitHubSecretPath, "storage.github-secret", "",
"Filesystem path of file containing the shared Github webhook secret.")
flag.StringVar(&fProject, "project", "mlab-oti",
"GCP project where this instance is running.")
prometheus.MustRegister(metricError)
prometheus.MustRegister(metricMachine)
prometheus.MustRegister(metricSite)
Expand Down
41 changes: 38 additions & 3 deletions gmx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ var savedState = `
"def02": ["8"],
"uvw03": ["4", "11"],
"xyz03": ["5"]
}
}
`
Expand Down Expand Up @@ -274,43 +273,79 @@ func TestCloseIssue(t *testing.T) {

func TestParseMessage(t *testing.T) {
r := strings.NewReader(savedState)
var s maintenanceState
var s = state
restoreState(r, &s)

tests := []struct {
name string
msg string
project string
expectedMods int
}{
{
name: "add-1-machine-to-maintenance",
msg: `/machine mlab1.abc01 is in maintenance mode.`,
project: `mlab-oti`,
expectedMods: 1,
},
{
name: "add-2-sites-to-maintenance",
msg: `Putting /site abc01 and /site xyz02 into maintenance mode.`,
project: `mlab-oti`,
expectedMods: 2,
},
{
name: "add-1-sites-and-1-machine-to-maintenance",
msg: `Putting /site abc01 and /machine mlab1.xyz02 into maintenance mode.`,
project: `mlab-oti`,
expectedMods: 2,
},
{
name: "remove-1-machine-and-1-site-from-maintenance",
msg: `Removing /machine mlab2.xyz01 del and /site uvw02 del from maintenance.`,
project: `mlab-oti`,
expectedMods: 2,
},
{
name: "3-malformed-flags",
msg: `Add /machine and /site vw02 to maintenance. Removing /site lol del.`,
project: `mlab-oti`,
expectedMods: 0,
},
{
name: "1-production-machine-1-staging-machine-flag",
msg: `Add /machine mlab2.ghi01 and /machine mlab4.ghi01 to maintenance.`,
project: `mlab-oti`,
expectedMods: 1,
},
{
name: "1-sandbox-machine-1-staging-machine-flag",
msg: `Add /machine mlab3.hij0t and /machine mlab4.qrs01 to maintenance.`,
project: `mlab-oti`,
expectedMods: 0,
},
{
name: "1-sandbox-machine-flag",
msg: `Add /machine mlab1.abc0t to maintenance.`,
project: `mlab-sandbox`,
expectedMods: 1,
},
{
name: "2-staging-machine-flags",
msg: `Add /machine mlab4.abc03 and /machine mlab4.wxy01 to maintenance.`,
project: `mlab-staging`,
expectedMods: 2,
},
{
name: "1-sandbox-site-flag",
msg: `Add /site nop0t to maintenance.`,
project: `mlab-sandbox`,
expectedMods: 1,
},
}

for _, test := range tests {
mods := parseMessage(test.msg, "99", &s)
mods := parseMessage(test.msg, "99", &s, test.project)
if mods != test.expectedMods {
t.Errorf("parseMessage(): %s: expected %d state modifications; got %d",
test.name, test.expectedMods, mods)
Expand Down

0 comments on commit b17fb66

Please sign in to comment.