From 66f68a7705f5e8e321895c73edbfec7548c0ccd2 Mon Sep 17 00:00:00 2001 From: Al Cutter Date: Fri, 15 Dec 2023 13:27:07 +0100 Subject: [PATCH] Support custom Cache-Control headers for GCP log (#64) This PR allows users of the GCP serverless log to customise `Cache-Control` headers for log objects. --- experimental/gcp-log/README.md | 10 +++ experimental/gcp-log/function.go | 21 +++++- .../gcp-log/internal/storage/storage.go | 66 ++++++++++--------- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/experimental/gcp-log/README.md b/experimental/gcp-log/README.md index 54c24ae..4d6917f 100644 --- a/experimental/gcp-log/README.md +++ b/experimental/gcp-log/README.md @@ -105,3 +105,13 @@ Set up a log and write to the log via GCF invocation. "noteKeyName": "${NOTE_SIGNING_NAME}" }' ``` + +### Cache-control + +The following two optional parameters can be added to all function calls to customise the +`Cache-Control` headers associated with the log artefacts: + +* `checkpointCacheControl`, if supplied, sets the `Cache-Control` header for the `checkpoint` object. +* `otherCacheControl`, if supplied, sets the `Cache-Control` header for all other objects. + +The values for these parameters should be a valid [Cache-Control](https://cloud.google.com/storage/docs/metadata#cache-control) metadata string, e.g. `public, max-age=3600`. \ No newline at end of file diff --git a/experimental/gcp-log/function.go b/experimental/gcp-log/function.go index 446f88a..86ccd73 100644 --- a/experimental/gcp-log/function.go +++ b/experimental/gcp-log/function.go @@ -27,7 +27,7 @@ import ( "github.com/gcp_serverless_module/internal/storage" - "cloud.google.com/go/kms/apiv1" + kms "cloud.google.com/go/kms/apiv1" "github.com/transparency-dev/armored-witness/pkg/kmssigner" fmtlog "github.com/transparency-dev/formats/log" "github.com/transparency-dev/merkle/rfc6962" @@ -46,6 +46,11 @@ type requestData struct { KMSKeyLocation string `json:"kmsKeyLocation"` KMSKeyVersion uint `json:"kmsKeyVersion"` + // Cache-Control header for checkpoint objects + CheckpointCacheControl string `json:"checkpointCacheControl"` + // Cache-Control header for non-checkpoint objects + OtherCacheControl string `json:"otherCacheControl"` + // For Sequence requests. EntriesDir string `json:"entriesDir"` @@ -90,6 +95,16 @@ func validateCommonArgs(w http.ResponseWriter, d requestData) (ok bool) { return true } +// newClient returns a storage Client built for the request args. +func newClient(ctx context.Context, d requestData) (*storage.Client, error) { + return storage.NewClient(ctx, storage.ClientOpts{ + ProjectID: os.Getenv("GCP_PROJECT"), + Bucket: d.Bucket, + CheckpointCacheControl: d.CheckpointCacheControl, + OtherCacheControl: d.OtherCacheControl, + }) +} + // Sequence is the entrypoint of the `sequence` GCF function. func Sequence(w http.ResponseWriter, r *http.Request) { // TODO(jayhou): validate that EntriesDir is only touching the log path. @@ -116,7 +131,7 @@ func Sequence(w http.ResponseWriter, r *http.Request) { // init storage ctx := r.Context() - client, err := storage.NewClient(ctx, os.Getenv("GCP_PROJECT"), d.Bucket) + client, err := newClient(ctx, d) if err != nil { http.Error(w, fmt.Sprintf("Failed to create GCS client: %q", err), http.StatusInternalServerError) return @@ -264,7 +279,7 @@ func Integrate(w http.ResponseWriter, r *http.Request) { } defer kmClient.Close() - client, err := storage.NewClient(ctx, os.Getenv("GCP_PROJECT"), d.Bucket) + client, err := newClient(ctx, d) if err != nil { http.Error(w, fmt.Sprintf("Failed to create GCS client: %v", err), http.StatusBadRequest) return diff --git a/experimental/gcp-log/internal/storage/storage.go b/experimental/gcp-log/internal/storage/storage.go index 1288e41..8a93940 100644 --- a/experimental/gcp-log/internal/storage/storage.go +++ b/experimental/gcp-log/internal/storage/storage.go @@ -54,20 +54,40 @@ type Client struct { // Note that nextSeq may be <= than the actual next available number, but // never greater. nextSeq uint64 + + checkpointCacheControl string + otherCacheControl string +} + +// ClientOpts holds configuration options for the storage client. +type ClientOpts struct { + // ProjectID is the GCP project which hosts the storage bucket for the log. + ProjectID string + // Bucket is the name of the bucket to use for storing log state. + Bucket string + // CheckpointCacheControl, if set, will cause the Cache-Control header associated with the + // checkpoint object to be set to this value. If unset, the current GCP default will be used. + CheckpointCacheControl string + // OtherCacheControl, if set, will cause the Cache-Control header associated with the + // all non-checkpoint objects to be set to this value. If unset, the current GCP default + // will be used. + OtherCacheControl string } // NewClient returns a Client which allows interaction with the log stored in // the specified bucket on GCS. -func NewClient(ctx context.Context, projectID, bucket string) (*Client, error) { +func NewClient(ctx context.Context, opts ClientOpts) (*Client, error) { c, err := gcs.NewClient(ctx) if err != nil { return nil, err } return &Client{ - gcsClient: c, - projectID: projectID, - bucket: bucket, + gcsClient: c, + projectID: opts.ProjectID, + bucket: opts.Bucket, + checkpointCacheControl: opts.CheckpointCacheControl, + otherCacheControl: opts.OtherCacheControl, }, nil } @@ -121,6 +141,9 @@ func (c *Client) WriteCheckpoint(ctx context.Context, newCPRaw []byte) error { bkt := c.gcsClient.Bucket(c.bucket) obj := bkt.Object(layout.CheckpointPath) w := obj.NewWriter(ctx) + if c.checkpointCacheControl != "" { + w.ObjectAttrs.CacheControl = c.checkpointCacheControl + } if _, err := w.Write(newCPRaw); err != nil { return err } @@ -297,6 +320,9 @@ func (c *Client) Sequence(ctx context.Context, leafhash []byte, leaf []byte) (ui // This may exist if there is more than one instance of the sequencer // writing to the same log. w := bkt.Object(seqPath).If(gcs.Conditions{DoesNotExist: true}).NewWriter(ctx) + if c.otherCacheControl != "" { + w.ObjectAttrs.CacheControl = c.otherCacheControl + } if _, err := w.Write(leaf); err != nil { return 0, fmt.Errorf("failed to write seq file: %w", err) } @@ -321,6 +347,9 @@ func (c *Client) Sequence(ctx context.Context, leafhash []byte, leaf []byte) (ui // file above but before doing this, a resubmission of the same leafhash // would be permitted. wLeaf := bkt.Object(leafPath).NewWriter(ctx) + if c.otherCacheControl != "" { + w.ObjectAttrs.CacheControl = c.otherCacheControl + } if _, err := wLeaf.Write([]byte(strconv.FormatUint(seq, 16))); err != nil { return 0, fmt.Errorf("couldn't create leafhash object: %w", err) } @@ -355,34 +384,11 @@ func (c *Client) StoreTile(ctx context.Context, level, index uint64, tile *api.T obj := bkt.Object(tPath) w := obj.NewWriter(ctx) + if c.otherCacheControl != "" { + w.ObjectAttrs.CacheControl = c.otherCacheControl + } if _, err := w.Write(t); err != nil { return fmt.Errorf("failed to write tile object %q to bucket %q: %w", tPath, c.bucket, err) } return w.Close() - - if tileSize == 256 { - // Get partial files. - it := bkt.Objects(ctx, &gcs.Query{ - Prefix: tPath, - // Without specifying a delimiter, the objects returned may be - // recursively under "directories". Specifying a delimiter only returns - // objects under the given prefix path "directory". - Delimiter: "/", - }) - for { - attrs, err := it.Next() - if err == iterator.Done { - break - } - if err != nil { - return fmt.Errorf("failed to get object %q from bucket %q: %v", tPath, c.bucket, err) - } - - if _, err := bkt.Object(attrs.Name).NewWriter(ctx).Write(t); err != nil { - return fmt.Errorf("failed to copy full tile to partials object %q in bucket %q: %v", attrs.Name, c.bucket, err) - } - } - } - - return nil }