Skip to content

Commit

Permalink
rumble: refactor to use vuln scan results from prod-enforce (#1823)
Browse files Browse the repository at this point in the history
## Type of change
Late breaking 24i issue. 

### What should this PR do?
Vuln scanning has moved from `prod-images` to `prod-enforce`. The CVE
comparison pages on edu.chainguard.dev have been broken for a while
since the scanners have been turned down from `prod-images`. This PR
updates the location of the datasets used, as well as refactors to take
into consideration some of the changes to the data.

### Why are we making this change?
Fix CVE comparisons between external and Chainguard equivalent images.

### What are the acceptance criteria? 
* CVE data is pulled from the correct bigquery source (`prod-enforce`),
and we have data for both external and Chainguard images for each
requested.

### How should this PR be tested?
This change chan be run locally without uploading the results to test
the datasources are being queried and correlated properly:

```
go run main.go vulns --project=prod-enforce-fabc --db=cloudevents_grype_scan_results

go run main.go image-csv --project=prod-enforce-fabc --db=cloudevents_grype_scan_results --rumble-json-path=../../data/rumble.json

go run main.go legacy-csv --project=prod-enforce-fabc --db=cloudevents_grype_scan_results
```

Signed-off-by: Colin Douglas <[email protected]>
  • Loading branch information
cmdpdx authored Sep 25, 2024
1 parent e3f3159 commit b1b25f4
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 179 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/rumble-vulnerability-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # actions/setup-go@v4
with:
go-version: '1.22'
go-version-file: go.mod
check-latest: true

- name: Authenticate to Google Cloud
Expand All @@ -49,17 +49,17 @@ jobs:
- name: Generate vulnerability JSON files
run: |
go run main.go vulns \
--project prod-images-c6e5 \
--db insights_ds \
--project prod-enforce-fabc \
--db cloudevents_grype_scan_results \
--gcs-project chainguard-academy \
--bucket chainguard-academy \
--upload
- name: Generate image comparison CSVs
run: |
go run main.go image-csv \
--project prod-images-c6e5 \
--db insights_ds \
--project prod-enforce-fabc \
--db cloudevents_grype_scan_results \
--gcs-project chainguard-academy \
--bucket chainguard-academy \
--rumble-json-path ../../data/rumble.json \
Expand All @@ -68,8 +68,8 @@ jobs:
- name: Generate legacy comparison CSV
run: |
go run main.go legacy-csv \
--project prod-images-c6e5 \
--db insights_ds \
--project prod-enforce-fabc \
--db cloudevents_grype_scan_results \
--gcs-project chainguard-academy \
--bucket chainguard-academy \
--upload
Expand Down
2 changes: 1 addition & 1 deletion data/rumble.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{"image":"busybox","left":"busybox:latest","right":"cgr.dev/chainguard/busybox:latest"},
{"image":"cassandra","left":"cassandra:latest","right":"cgr.dev/chainguard/cassandra:latest"},
{"image":"curl","left":"curlimages/curl:latest","right":"cgr.dev/chainguard/curl:latest"},
{"image":"deno","left":"deno:latest","right":"cgr.dev/chainguard/deno:latest"},
{"image":"deno","left":"denoland/deno:latest","right":"cgr.dev/chainguard/deno:latest"},
{"image":"dotnet-runtime","left":"mcr.microsoft.com/dotnet/runtime:latest","right":"cgr.dev/chainguard/dotnet-runtime:latest"},
{"image":"dotnet-sdk","left":"mcr.microsoft.com/dotnet/sdk:latest","right":"cgr.dev/chainguard/dotnet-sdk:latest"},
{"image":"dex","left":"dexidp/dex:latest","right":"cgr.dev/chainguard/dex:latest"},
Expand Down
86 changes: 45 additions & 41 deletions tools/rumble/cmd/image_csvs.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ SPDX-License-Identifier: Apache-2.0
package cmd

import (
"context"
"encoding/csv"
"encoding/json"
"fmt"
Expand All @@ -19,7 +18,6 @@ import (

"cloud.google.com/go/bigquery"
cgbigquery "github.com/chainguard-dev/edu/tools/rumble/pkg/bigquery"
cloudstorage "github.com/chainguard-dev/edu/tools/rumble/pkg/cloudstorage"
"github.com/spf13/cobra"
)

Expand All @@ -30,10 +28,8 @@ type rumbleJson []struct {
}

type imageCsv struct {
ctx context.Context
bqClient cgbigquery.BqClient
storageClient cloudstorage.GcsClient
opts *options
rumbleBase

rumbleJsonPath string
comparisons *rumbleJson
}
Expand All @@ -45,20 +41,10 @@ func cmdImageCsvs(o *options) *cobra.Command {
Use: "image-csv",
Short: "CSV for a single external/third party image and a Chainguard image",
RunE: func(cmd *cobra.Command, args []string) error {
db, _ := cmd.Flags().GetString("db")
project, _ := cmd.Flags().GetString("project")
gcsProject, _ := cmd.Flags().GetString("gcs-project")
bucket, _ := cmd.Flags().GetString("bucket")
up, _ := cmd.Flags().GetBool("upload")

i := imageCsv{
ctx: cmd.Context(),
opts: &options{
dbProject: project,
storageProject: gcsProject,
db: db,
storageBucket: bucket,
upload: up,
rumbleBase: rumbleBase{
ctx: cmd.Context(),
opts: o,
},
rumbleJsonPath: rumbleJsonPath,
}
Expand All @@ -71,21 +57,6 @@ func cmdImageCsvs(o *options) *cobra.Command {
return cmd
}

func (i *imageCsv) setupClients() error {
var err error

i.bqClient, err = cgbigquery.NewBqClient(i.opts.dbProject, i.opts.db)
if err != nil {
log.Fatalf("error initializing bq client: %v", err)
}

i.storageClient, err = cloudstorage.NewGcsClient(i.ctx, i.opts.storageBucket)
if err != nil {
log.Fatalf("error initializing gcs client: %v", err)
}
return nil
}

func (i *imageCsv) parseRumbleJson() error {
f, err := os.ReadFile(i.rumbleJsonPath)
if err != nil {
Expand All @@ -99,6 +70,16 @@ func (i *imageCsv) parseRumbleJson() error {
return nil
}

// splitRepoTag splits an image references like foo:bar into
// the repo and tag components. If a ref doesn't have a tag,
// latest is assumed.
func splitRepoTag(ref string) (string, string) {
if repo, tag, ok := strings.Cut(ref, ":"); ok {
return repo, tag
}
return ref, "latest"
}

func (i *imageCsv) generateCsvs() error {
if i.rumbleJsonPath == "" {
return fmt.Errorf("missing --theirs or --ours argument")
Expand All @@ -108,14 +89,19 @@ func (i *imageCsv) generateCsvs() error {
return err
}

if err := i.setupClients(); err != nil {
closer, err := i.setupClients()
if err != nil {
return err
}
defer i.bqClient.Client.Close()
defer i.storageClient.Client.Close()
defer closer()

for _, img := range *i.comparisons {
log.Printf("querying %v", img)

q := i.bqClient.Client.Query(cgbigquery.ImageComparisonCsvQuery)
theirRepo, theirTag := splitRepoTag(img.Theirs)
ourRepo, ourTag := splitRepoTag(img.Ours)

q.DefaultProjectID = i.bqClient.Client.Project()
q.Parameters = []bigquery.QueryParameter{
{
Expand All @@ -124,7 +110,16 @@ func (i *imageCsv) generateCsvs() error {
Type: bigquery.StandardSQLDataType{
TypeKind: "STRING",
},
Value: img.Theirs,
Value: theirRepo,
},
},
{
Name: "their_tag",
Value: &bigquery.QueryParameterValue{
Type: bigquery.StandardSQLDataType{
TypeKind: "STRING",
},
Value: theirTag,
},
},
{
Expand All @@ -133,7 +128,16 @@ func (i *imageCsv) generateCsvs() error {
Type: bigquery.StandardSQLDataType{
TypeKind: "STRING",
},
Value: img.Ours,
Value: ourRepo,
},
},
{
Name: "our_tag",
Value: &bigquery.QueryParameterValue{
Type: bigquery.StandardSQLDataType{
TypeKind: "STRING",
},
Value: ourTag,
},
},
}
Expand All @@ -153,15 +157,15 @@ func (i *imageCsv) generateCsvs() error {
}
defer gcsCsvWriter.Close()
w = csv.NewWriter(gcsCsvWriter)
fmt.Printf("Writing %s to GCS bucket\n", fName)
log.Printf("Writing %s to GCS bucket", fName)
default:
f, err := os.Create(fName)
if err != nil {
return err
}
defer f.Close()
w = csv.NewWriter(f)
fmt.Printf("Writing to %s\n", f.Name())
log.Printf("Writing to %s", f.Name())
}

err = w.Write(strings.Split(cgbigquery.ImageScanCsvHeader, ","))
Expand Down
56 changes: 10 additions & 46 deletions tools/rumble/cmd/legacy_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ SPDX-License-Identifier: Apache-2.0
package cmd

import (
"context"
"encoding/csv"
"fmt"
"log"
Expand All @@ -17,78 +16,43 @@ import (
"strings"

cgbigquery "github.com/chainguard-dev/edu/tools/rumble/pkg/bigquery"
cloudstorage "github.com/chainguard-dev/edu/tools/rumble/pkg/cloudstorage"
"github.com/spf13/cobra"
)

type legacyCsv struct {
ctx context.Context
bqClient cgbigquery.BqClient
storageClient cloudstorage.GcsClient
opts *options
}

func cmdLegacyCsv(o *options) *cobra.Command {
cmd := &cobra.Command{
Use: "legacy-csv",
Short: "Single CSV with all scanned images and vulns",
RunE: func(cmd *cobra.Command, args []string) error {
db, _ := cmd.Flags().GetString("db")
project, _ := cmd.Flags().GetString("project")
gcsProject, _ := cmd.Flags().GetString("gcs-project")
bucket, _ := cmd.Flags().GetString("bucket")
up, _ := cmd.Flags().GetBool("upload")

l := legacyCsv{
ctx: cmd.Context(),
opts: &options{
dbProject: project,
storageProject: gcsProject,
db: db,
storageBucket: bucket,
upload: up,
},
l := rumbleBase{
ctx: cmd.Context(),
opts: o,
}
return l.generateCsv()
},
}
return cmd
}

func (l *legacyCsv) setupClients() error {
var err error

l.bqClient, err = cgbigquery.NewBqClient(l.opts.dbProject, l.opts.db)
if err != nil {
log.Fatalf("error initializing bq client: %v", err)
}

l.storageClient, err = cloudstorage.NewGcsClient(l.ctx, l.opts.storageBucket)
func (c *rumbleBase) generateCsv() error {
closer, err := c.setupClients()
if err != nil {
log.Fatalf("error initializing gcs client: %v", err)
}
return nil
}

func (l *legacyCsv) generateCsv() error {
if err := l.setupClients(); err != nil {
return err
}
defer l.bqClient.Client.Close()
defer l.storageClient.Client.Close()
defer closer()

q := l.bqClient.Client.Query(cgbigquery.LegacyCsvQuery)
q := c.bqClient.Client.Query(cgbigquery.LegacyCsvQuery)

rows, err := l.bqClient.Query(q, cgbigquery.LegacyScanQueryType)
rows, err := c.bqClient.Query(q, cgbigquery.LegacyScanQueryType)
if err != nil {
log.Fatalf("error fetching scan results: %v", err)
}

var w *csv.Writer
switch l.opts.upload {
switch c.opts.upload {
case true:
fName := "cve-data/data.csv"
gcsCsvWriter, err := l.storageClient.GetCsvWriter(fName)
gcsCsvWriter, err := c.storageClient.GetCsvWriter(fName)
if err != nil {
return err
}
Expand Down
48 changes: 48 additions & 0 deletions tools/rumble/cmd/options.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,57 @@
/*
Copyright 2024 Chainguard, Inc.
SPDX-License-Identifier: Apache-2.0
*/

package cmd

import (
"context"
"log"

cgbigquery "github.com/chainguard-dev/edu/tools/rumble/pkg/bigquery"
cloudstorage "github.com/chainguard-dev/edu/tools/rumble/pkg/cloudstorage"
)

type rumbleBase struct {
ctx context.Context
bqClient cgbigquery.BqClient
storageClient cloudstorage.GcsClient
opts *options
}

type options struct {
dbProject string
storageProject string
db string
storageBucket string
upload bool
}

func (c *rumbleBase) setupClients() (func(), error) {
var err error

c.bqClient, err = cgbigquery.NewBqClient(c.opts.dbProject, c.opts.db)
if err != nil {
log.Fatalf("error initializing bq client: %v", err)
}

// Only instantiate gcs client if we're uploading
if c.opts.upload {
c.storageClient, err = cloudstorage.NewGcsClient(c.ctx, c.opts.storageBucket)
if err != nil {
log.Fatalf("error initializing gcs client: %v", err)
}
}

return func() {
if err := c.bqClient.Client.Close(); err != nil {
log.Println(err)
}
if c.storageClient.Client != nil {
if err := c.storageClient.Client.Close(); err != nil {
log.Println(err)
}
}
}, nil
}
Loading

0 comments on commit b1b25f4

Please sign in to comment.