From bdb9ce89c6e5a72e9d4a2d92304169e15d8db882 Mon Sep 17 00:00:00 2001 From: David Anyatonwu Date: Tue, 17 Dec 2024 21:42:56 +0100 Subject: [PATCH] Implement Google Cloud Storage Store Signed-off-by: David Anyatonwu --- deployment-examples/gcs-store-example.yaml | 13 + nativelink-config/src/stores.rs | 47 + .../google/storage/v2/storage.proto | 2346 +++++++++++++++++ nativelink-store/src/gcs_store.rs | 490 ++++ nativelink-store/src/lib.rs | 1 + 5 files changed, 2897 insertions(+) create mode 100644 deployment-examples/gcs-store-example.yaml create mode 100644 nativelink-proto/google/storage/v2/storage.proto create mode 100644 nativelink-store/src/gcs_store.rs diff --git a/deployment-examples/gcs-store-example.yaml b/deployment-examples/gcs-store-example.yaml new file mode 100644 index 000000000..a81bee9cd --- /dev/null +++ b/deployment-examples/gcs-store-example.yaml @@ -0,0 +1,13 @@ +stores: + gcs_store: + type: "gcs_store" + bucket: "my-bucket" + key_prefix: "my-prefix/" + consider_expired_after_s: 3600 + retry: + max_retries: 3 + initial_delay_millis: 100 + max_delay_millis: 1000 + jitter: 0.1 + multipart_max_concurrent_uploads: 10 + max_retry_buffer_per_request: 5242880 # 5MB \ No newline at end of file diff --git a/nativelink-config/src/stores.rs b/nativelink-config/src/stores.rs index 7e6e31821..3a12a8dfc 100644 --- a/nativelink-config/src/stores.rs +++ b/nativelink-config/src/stores.rs @@ -430,6 +430,26 @@ pub enum StoreSpec { /// ``` /// noop(NoopSpec), + + /// GCS store will use Google Cloud Storage (GCS) as a backend to store + /// the files. This configuration can be used to share files + /// across multiple instances. + /// + /// **Example JSON Config:** + /// ```json + /// "gcs_store": { + /// "bucket": "crossplane-bucket-af79aeca9", + /// "key_prefix": "test-prefix-index/", + /// "retry": { + /// "max_retries": 6, + /// "delay": 0.3, + /// "jitter": 0.5 + /// }, + /// "multipart_max_concurrent_uploads": 10 + /// } + /// ``` + /// + gcs_store(GcsSpec), } /// Configuration for an individual shard of the store. @@ -1063,3 +1083,30 @@ pub struct Retry { #[serde(default)] pub retry_on_errors: Option>, } + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct GcsSpec { + /// The name of the GCS bucket. + pub bucket: String, + + /// Optional prefix to prepend to all keys in the bucket. + pub key_prefix: Option, + + /// If set, objects older than this many seconds will be considered expired + /// and will be treated as if they don't exist. + #[serde(default)] + pub consider_expired_after_s: u32, + + /// Configuration for retrying failed operations. + #[serde(default)] + pub retry: Retry, + + /// Maximum number of bytes to buffer for retrying requests. + /// Defaults to 5MB if not specified. + pub max_retry_buffer_per_request: Option, + + /// Maximum number of concurrent uploads for multipart operations. + /// Defaults to 10 if not specified. + pub multipart_max_concurrent_uploads: Option, +} diff --git a/nativelink-proto/google/storage/v2/storage.proto b/nativelink-proto/google/storage/v2/storage.proto new file mode 100644 index 000000000..38f06050e --- /dev/null +++ b/nativelink-proto/google/storage/v2/storage.proto @@ -0,0 +1,2346 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.storage.v2; + +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/api/routing.proto"; +import "google/iam/v1/iam_policy.proto"; +import "google/iam/v1/policy.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; +import "google/type/date.proto"; + +option go_package = "cloud.google.com/go/storage/internal/apiv2/storagepb;storagepb"; +option java_multiple_files = true; +option java_outer_classname = "StorageProto"; +option java_package = "com.google.storage.v2"; +option (google.api.resource_definition) = { + type: "cloudkms.googleapis.com/CryptoKey" + pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}" +}; + +// ## API Overview and Naming Syntax +// +// The Cloud Storage gRPC API allows applications to read and write data through +// the abstractions of buckets and objects. For a description of these +// abstractions please see https://cloud.google.com/storage/docs. +// +// Resources are named as follows: +// - Projects are referred to as they are defined by the Resource Manager API, +// using strings like `projects/123456` or `projects/my-string-id`. +// - Buckets are named using string names of the form: +// `projects/{project}/buckets/{bucket}` +// For globally unique buckets, `_` may be substituted for the project. +// - Objects are uniquely identified by their name along with the name of the +// bucket they belong to, as separate strings in this API. For example: +// +// ReadObjectRequest { +// bucket: 'projects/_/buckets/my-bucket' +// object: 'my-object' +// } +// Note that object names can contain `/` characters, which are treated as +// any other character (no special directory semantics). +service Storage { + option (google.api.default_host) = "storage.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/cloud-platform," + "https://www.googleapis.com/auth/cloud-platform.read-only," + "https://www.googleapis.com/auth/devstorage.full_control," + "https://www.googleapis.com/auth/devstorage.read_only," + "https://www.googleapis.com/auth/devstorage.read_write"; + + // Permanently deletes an empty bucket. + rpc DeleteBucket(DeleteBucketRequest) returns (google.protobuf.Empty) { + option (google.api.routing) = { + routing_parameters { field: "name" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "name"; + } + + // Returns metadata for the specified bucket. + rpc GetBucket(GetBucketRequest) returns (Bucket) { + option (google.api.routing) = { + routing_parameters { field: "name" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "name"; + } + + // Creates a new bucket. + rpc CreateBucket(CreateBucketRequest) returns (Bucket) { + option (google.api.routing) = { + routing_parameters { field: "parent" path_template: "{project=**}" } + routing_parameters { + field: "bucket.project" + path_template: "{project=**}" + } + }; + option (google.api.method_signature) = "parent,bucket,bucket_id"; + } + + // Retrieves a list of buckets for a given project. + rpc ListBuckets(ListBucketsRequest) returns (ListBucketsResponse) { + option (google.api.routing) = { + routing_parameters { field: "parent" path_template: "{project=**}" } + }; + option (google.api.method_signature) = "parent"; + } + + // Locks retention policy on a bucket. + rpc LockBucketRetentionPolicy(LockBucketRetentionPolicyRequest) + returns (Bucket) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket"; + } + + // Gets the IAM policy for a specified bucket. + // The `resource` field in the request should be + // `projects/_/buckets/{bucket}`. + rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest) + returns (google.iam.v1.Policy) { + option (google.api.routing) = { + routing_parameters { field: "resource" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "resource"; + } + + // Updates an IAM policy for the specified bucket. + // The `resource` field in the request should be + // `projects/_/buckets/{bucket}`. + rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest) + returns (google.iam.v1.Policy) { + option (google.api.routing) = { + routing_parameters { field: "resource" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "resource,policy"; + } + + // Tests a set of permissions on the given bucket, object, or managed folder + // to see which, if any, are held by the caller. + // The `resource` field in the request should be + // `projects/_/buckets/{bucket}` for a bucket, + // `projects/_/buckets/{bucket}/objects/{object}` for an object, or + // `projects/_/buckets/{bucket}/managedFolders/{managedFolder}` + // for a managed folder. + rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest) + returns (google.iam.v1.TestIamPermissionsResponse) { + option (google.api.routing) = { + routing_parameters { field: "resource" path_template: "{bucket=**}" } + routing_parameters { + field: "resource" + path_template: "{bucket=projects/*/buckets/*}/objects/**" + } + routing_parameters { + field: "resource" + path_template: "{bucket=projects/*/buckets/*}/managedFolders/**" + } + }; + option (google.api.method_signature) = "resource,permissions"; + } + + // Updates a bucket. Equivalent to JSON API's storage.buckets.patch method. + rpc UpdateBucket(UpdateBucketRequest) returns (Bucket) { + option (google.api.routing) = { + routing_parameters { field: "bucket.name" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket,update_mask"; + } + + // Concatenates a list of existing objects into a new object in the same + // bucket. + rpc ComposeObject(ComposeObjectRequest) returns (Object) { + option (google.api.routing) = { + routing_parameters { + field: "destination.bucket" + path_template: "{bucket=**}" + } + }; + } + + // Deletes an object and its metadata. + // + // Deletions are normally permanent when versioning is disabled or whenever + // the generation parameter is used. However, if soft delete is enabled for + // the bucket, deleted objects can be restored using RestoreObject until the + // soft delete retention period has passed. + rpc DeleteObject(DeleteObjectRequest) returns (google.protobuf.Empty) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket,object"; + option (google.api.method_signature) = "bucket,object,generation"; + } + + // Restores a soft-deleted object. + rpc RestoreObject(RestoreObjectRequest) returns (Object) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket,object,generation"; + } + + // Cancels an in-progress resumable upload. + // + // Any attempts to write to the resumable upload after cancelling the upload + // will fail. + // + // The behavior for currently in progress write operations is not guaranteed - + // they could either complete before the cancellation or fail if the + // cancellation completes first. + rpc CancelResumableWrite(CancelResumableWriteRequest) + returns (CancelResumableWriteResponse) { + option (google.api.routing) = { + routing_parameters { + field: "upload_id" + path_template: "{bucket=projects/*/buckets/*}/**" + } + }; + option (google.api.method_signature) = "upload_id"; + } + + // Retrieves an object's metadata. + rpc GetObject(GetObjectRequest) returns (Object) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket,object"; + option (google.api.method_signature) = "bucket,object,generation"; + } + + // Reads an object's data. + rpc ReadObject(ReadObjectRequest) returns (stream ReadObjectResponse) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "bucket,object"; + option (google.api.method_signature) = "bucket,object,generation"; + } + + // Updates an object's metadata. + // Equivalent to JSON API's storage.objects.patch. + rpc UpdateObject(UpdateObjectRequest) returns (Object) { + option (google.api.routing) = { + routing_parameters { field: "object.bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "object,update_mask"; + } + + // Stores a new object and metadata. + // + // An object can be written either in a single message stream or in a + // resumable sequence of message streams. To write using a single stream, + // the client should include in the first message of the stream an + // `WriteObjectSpec` describing the destination bucket, object, and any + // preconditions. Additionally, the final message must set 'finish_write' to + // true, or else it is an error. + // + // For a resumable write, the client should instead call + // `StartResumableWrite()`, populating a `WriteObjectSpec` into that request. + // They should then attach the returned `upload_id` to the first message of + // each following call to `WriteObject`. If the stream is closed before + // finishing the upload (either explicitly by the client or due to a network + // error or an error response from the server), the client should do as + // follows: + // - Check the result Status of the stream, to determine if writing can be + // resumed on this stream or must be restarted from scratch (by calling + // `StartResumableWrite()`). The resumable errors are DEADLINE_EXCEEDED, + // INTERNAL, and UNAVAILABLE. For each case, the client should use binary + // exponential backoff before retrying. Additionally, writes can be + // resumed after RESOURCE_EXHAUSTED errors, but only after taking + // appropriate measures, which may include reducing aggregate send rate + // across clients and/or requesting a quota increase for your project. + // - If the call to `WriteObject` returns `ABORTED`, that indicates + // concurrent attempts to update the resumable write, caused either by + // multiple racing clients or by a single client where the previous + // request was timed out on the client side but nonetheless reached the + // server. In this case the client should take steps to prevent further + // concurrent writes (e.g., increase the timeouts, stop using more than + // one process to perform the upload, etc.), and then should follow the + // steps below for resuming the upload. + // - For resumable errors, the client should call `QueryWriteStatus()` and + // then continue writing from the returned `persisted_size`. This may be + // less than the amount of data the client previously sent. Note also that + // it is acceptable to send data starting at an offset earlier than the + // returned `persisted_size`; in this case, the service will skip data at + // offsets that were already persisted (without checking that it matches + // the previously written data), and write only the data starting from the + // persisted offset. Even though the data isn't written, it may still + // incur a performance cost over resuming at the correct write offset. + // This behavior can make client-side handling simpler in some cases. + // - Clients must only send data that is a multiple of 256 KiB per message, + // unless the object is being finished with `finish_write` set to `true`. + // + // The service will not view the object as complete until the client has + // sent a `WriteObjectRequest` with `finish_write` set to `true`. Sending any + // requests on a stream after sending a request with `finish_write` set to + // `true` will cause an error. The client **should** check the response it + // receives to determine how much data the service was able to commit and + // whether the service views the object as complete. + // + // Attempting to resume an already finalized object will result in an OK + // status, with a WriteObjectResponse containing the finalized object's + // metadata. + // + // Alternatively, the BidiWriteObject operation may be used to write an + // object with controls over flushing and the ability to fetch the ability to + // determine the current persisted size. + rpc WriteObject(stream WriteObjectRequest) returns (WriteObjectResponse) {} + + // Stores a new object and metadata. + // + // This is similar to the WriteObject call with the added support for + // manual flushing of persisted state, and the ability to determine current + // persisted size without closing the stream. + // + // The client may specify one or both of the `state_lookup` and `flush` fields + // in each BidiWriteObjectRequest. If `flush` is specified, the data written + // so far will be persisted to storage. If `state_lookup` is specified, the + // service will respond with a BidiWriteObjectResponse that contains the + // persisted size. If both `flush` and `state_lookup` are specified, the flush + // will always occur before a `state_lookup`, so that both may be set in the + // same request and the returned state will be the state of the object + // post-flush. When the stream is closed, a BidiWriteObjectResponse will + // always be sent to the client, regardless of the value of `state_lookup`. + rpc BidiWriteObject(stream BidiWriteObjectRequest) + returns (stream BidiWriteObjectResponse) {} + + // Retrieves a list of objects matching the criteria. + rpc ListObjects(ListObjectsRequest) returns (ListObjectsResponse) { + option (google.api.routing) = { + routing_parameters { field: "parent" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = "parent"; + } + + // Rewrites a source object to a destination object. Optionally overrides + // metadata. + rpc RewriteObject(RewriteObjectRequest) returns (RewriteResponse) { + option (google.api.routing) = { + routing_parameters { field: "source_bucket" } + routing_parameters { + field: "destination_bucket" + path_template: "{bucket=**}" + } + }; + } + + // Starts a resumable write. How long the write operation remains valid, and + // what happens when the write operation becomes invalid, are + // service-dependent. + rpc StartResumableWrite(StartResumableWriteRequest) + returns (StartResumableWriteResponse) { + option (google.api.routing) = { + routing_parameters { + field: "write_object_spec.resource.bucket" + path_template: "{bucket=**}" + } + }; + } + + // Determines the `persisted_size` for an object that is being written, which + // can then be used as the `write_offset` for the next `Write()` call. + // + // If the object does not exist (i.e., the object has been deleted, or the + // first `Write()` has not yet reached the service), this method returns the + // error `NOT_FOUND`. + // + // The client **may** call `QueryWriteStatus()` at any time to determine how + // much data has been processed for this object. This is useful if the + // client is buffering data and needs to know which data can be safely + // evicted. For any sequence of `QueryWriteStatus()` calls for a given + // object name, the sequence of returned `persisted_size` values will be + // non-decreasing. + rpc QueryWriteStatus(QueryWriteStatusRequest) + returns (QueryWriteStatusResponse) { + option (google.api.routing) = { + routing_parameters { + field: "upload_id" + path_template: "{bucket=projects/*/buckets/*}/**" + } + }; + option (google.api.method_signature) = "upload_id"; + } + + // Moves the source object to the destination object in the same bucket. + rpc MoveObject(MoveObjectRequest) returns (Object) { + option (google.api.routing) = { + routing_parameters { field: "bucket" path_template: "{bucket=**}" } + }; + option (google.api.method_signature) = + "bucket,source_object,destination_object"; + } +} + +// Request message for DeleteBucket. +message DeleteBucketRequest { + // Required. Name of a bucket to delete. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // If set, only deletes the bucket if its metageneration matches this value. + optional int64 if_metageneration_match = 2; + + // If set, only deletes the bucket if its metageneration does not match this + // value. + optional int64 if_metageneration_not_match = 3; +} + +// Request message for GetBucket. +message GetBucketRequest { + // Required. Name of a bucket. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // If set, and if the bucket's current metageneration does not match the + // specified value, the request will return an error. + optional int64 if_metageneration_match = 2; + + // If set, and if the bucket's current metageneration matches the specified + // value, the request will return an error. + optional int64 if_metageneration_not_match = 3; + + // Mask specifying which fields to read. + // A "*" field may be used to indicate all fields. + // If no mask is specified, will default to all fields. + optional google.protobuf.FieldMask read_mask = 5; +} + +// Request message for CreateBucket. +message CreateBucketRequest { + // Required. The project to which this bucket will belong. + string parent = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + child_type: "storage.googleapis.com/Bucket" + } + ]; + + // Properties of the new bucket being inserted. + // The name of the bucket is specified in the `bucket_id` field. Populating + // `bucket.name` field will result in an error. + // The project of the bucket must be specified in the `bucket.project` field. + // This field must be in `projects/{projectIdentifier}` format, + // {projectIdentifier} can be the project ID or project number. The `parent` + // field must be either empty or `projects/_`. + Bucket bucket = 2; + + // Required. The ID to use for this bucket, which will become the final + // component of the bucket's resource name. For example, the value `foo` might + // result in a bucket with the name `projects/123456/buckets/foo`. + string bucket_id = 3 [(google.api.field_behavior) = REQUIRED]; + + // Apply a predefined set of access controls to this bucket. + // Valid values are "authenticatedRead", "private", "projectPrivate", + // "publicRead", or "publicReadWrite". + string predefined_acl = 6; + + // Apply a predefined set of default object access controls to this bucket. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string predefined_default_object_acl = 7; +} + +// Request message for ListBuckets. +message ListBucketsRequest { + // Required. The project whose buckets we are listing. + string parent = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + child_type: "storage.googleapis.com/Bucket" + } + ]; + + // Maximum number of buckets to return in a single response. The service will + // use this parameter or 1,000 items, whichever is smaller. If "acl" is + // present in the read_mask, the service will use this parameter of 200 items, + // whichever is smaller. + int32 page_size = 2; + + // A previously-returned page token representing part of the larger set of + // results to view. + string page_token = 3; + + // Filter results to buckets whose names begin with this prefix. + string prefix = 4; + + // Mask specifying which fields to read from each result. + // If no mask is specified, will default to all fields except items.owner, + // items.acl, and items.default_object_acl. + // * may be used to mean "all fields". + optional google.protobuf.FieldMask read_mask = 5; +} + +// The result of a call to Buckets.ListBuckets +message ListBucketsResponse { + // The list of items. + repeated Bucket buckets = 1; + + // The continuation token, used to page through large result sets. Provide + // this value in a subsequent request to return the next page of results. + string next_page_token = 2; +} + +// Request message for LockBucketRetentionPolicyRequest. +message LockBucketRetentionPolicyRequest { + // Required. Name of a bucket. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. Makes the operation conditional on whether bucket's current + // metageneration matches the given value. Must be positive. + int64 if_metageneration_match = 2 [(google.api.field_behavior) = REQUIRED]; +} + +// Request for UpdateBucket method. +message UpdateBucketRequest { + // Required. The bucket to update. + // The bucket's `name` field will be used to identify the bucket. + Bucket bucket = 1 [(google.api.field_behavior) = REQUIRED]; + + // If set, will only modify the bucket if its metageneration matches this + // value. + optional int64 if_metageneration_match = 2; + + // If set, will only modify the bucket if its metageneration does not match + // this value. + optional int64 if_metageneration_not_match = 3; + + // Apply a predefined set of access controls to this bucket. + // Valid values are "authenticatedRead", "private", "projectPrivate", + // "publicRead", or "publicReadWrite". + string predefined_acl = 8; + + // Apply a predefined set of default object access controls to this bucket. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string predefined_default_object_acl = 9; + + // Required. List of fields to be updated. + // + // To specify ALL fields, equivalent to the JSON API's "update" function, + // specify a single field with the value `*`. Note: not recommended. If a new + // field is introduced at a later time, an older client updating with the `*` + // may accidentally reset the new field's value. + // + // Not specifying any fields is an error. + google.protobuf.FieldMask update_mask = 6 + [(google.api.field_behavior) = REQUIRED]; +} + +// Request message for ComposeObject. +message ComposeObjectRequest { + // Description of a source object for a composition request. + message SourceObject { + // Preconditions for a source object of a composition request. + message ObjectPreconditions { + // Only perform the composition if the generation of the source object + // that would be used matches this value. If this value and a generation + // are both specified, they must be the same value or the call will fail. + optional int64 if_generation_match = 1; + } + + // Required. The source object's name. All source objects must reside in the + // same bucket. + string name = 1 [(google.api.field_behavior) = REQUIRED]; + + // The generation of this object to use as the source. + int64 generation = 2; + + // Conditions that must be met for this operation to execute. + ObjectPreconditions object_preconditions = 3; + } + + // Required. Properties of the resulting object. + Object destination = 1 [(google.api.field_behavior) = REQUIRED]; + + // The list of source objects that will be concatenated into a single object. + repeated SourceObject source_objects = 2; + + // Apply a predefined set of access controls to the destination object. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string destination_predefined_acl = 9; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 4; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 5; + + // Resource name of the Cloud KMS key, of the form + // `projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key`, + // that will be used to encrypt the object. Overrides the object + // metadata's `kms_key_name` value, if any. + string kms_key = 6 [(google.api.resource_reference) = { + type: "cloudkms.googleapis.com/CryptoKey" + }]; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 7; + + // The checksums of the complete object. This will be validated against the + // combined checksums of the component objects. + ObjectChecksums object_checksums = 10; +} + +// Message for deleting an object. +// `bucket` and `object` **must** be set. +message DeleteObjectRequest { + // Required. Name of the bucket in which the object resides. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. The name of the finalized object to delete. + // Note: If you want to delete an unfinalized resumable upload please use + // `CancelResumableWrite`. + string object = 2 [(google.api.field_behavior) = REQUIRED]; + + // If present, permanently deletes a specific revision of this object (as + // opposed to the latest version, the default). + int64 generation = 4; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 5; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 6; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 7; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 8; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 10; +} + +// Message for restoring an object. +// `bucket`, `object`, and `generation` **must** be set. +message RestoreObjectRequest { + // Required. Name of the bucket in which the object resides. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. The name of the object to restore. + string object = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The specific revision of the object to restore. + int64 generation = 3 [(google.api.field_behavior) = REQUIRED]; + + // Optional. Restore token used to differentiate soft-deleted objects with the + // same name and generation. Only applicable for hierarchical namespace + // buckets. This parameter is optional, and is only required in the rare case + // when there are multiple soft-deleted objects with the same name and + // generation. + string restore_token = 11 [(google.api.field_behavior) = OPTIONAL]; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 4; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 5; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 6; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 7; + + // If false or unset, the bucket's default object ACL will be used. + // If true, copy the source object's access controls. + // Return an error if bucket has UBLA enabled. + optional bool copy_source_acl = 9; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 8; +} + +// Message for canceling an in-progress resumable upload. +// `upload_id` **must** be set. +message CancelResumableWriteRequest { + // Required. The upload_id of the resumable upload to cancel. This should be + // copied from the `upload_id` field of `StartResumableWriteResponse`. + string upload_id = 1 [(google.api.field_behavior) = REQUIRED]; +} + +// Empty response message for canceling an in-progress resumable upload, will be +// extended as needed. +message CancelResumableWriteResponse {} + +// Request message for ReadObject. +message ReadObjectRequest { + // Required. The name of the bucket containing the object to read. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. The name of the object to read. + string object = 2 [(google.api.field_behavior) = REQUIRED]; + + // If present, selects a specific revision of this object (as opposed + // to the latest version, the default). + int64 generation = 3; + + // The offset for the first byte to return in the read, relative to the start + // of the object. + // + // A negative `read_offset` value will be interpreted as the number of bytes + // back from the end of the object to be returned. For example, if an object's + // length is 15 bytes, a ReadObjectRequest with `read_offset` = -5 and + // `read_limit` = 3 would return bytes 10 through 12 of the object. Requesting + // a negative offset with magnitude larger than the size of the object will + // return the entire object. + int64 read_offset = 4; + + // The maximum number of `data` bytes the server is allowed to return in the + // sum of all `Object` messages. A `read_limit` of zero indicates that there + // is no limit, and a negative `read_limit` will cause an error. + // + // If the stream returns fewer bytes than allowed by the `read_limit` and no + // error occurred, the stream includes all data from the `read_offset` to the + // end of the resource. + int64 read_limit = 5; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 6; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 7; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 8; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 9; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 10; + + // Mask specifying which fields to read. + // The checksummed_data field and its children will always be present. + // If no mask is specified, will default to all fields except metadata.owner + // and metadata.acl. + // * may be used to mean "all fields". + optional google.protobuf.FieldMask read_mask = 12; +} + +// Request message for GetObject. +message GetObjectRequest { + // Required. Name of the bucket in which the object resides. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. Name of the object. + string object = 2 [(google.api.field_behavior) = REQUIRED]; + + // If present, selects a specific revision of this object (as opposed to the + // latest version, the default). + int64 generation = 3; + + // If true, return the soft-deleted version of this object. + optional bool soft_deleted = 11; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 4; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 5; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 6; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 7; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 8; + + // Mask specifying which fields to read. + // If no mask is specified, will default to all fields except metadata.acl and + // metadata.owner. + // * may be used to mean "all fields". + optional google.protobuf.FieldMask read_mask = 10; + + // Optional. Restore token used to differentiate soft-deleted objects with the + // same name and generation. Only applicable for hierarchical namespace + // buckets and if soft_deleted is set to true. This parameter is optional, and + // is only required in the rare case when there are multiple soft-deleted + // objects with the same name and generation. + string restore_token = 12 [(google.api.field_behavior) = OPTIONAL]; +} + +// Response message for ReadObject. +message ReadObjectResponse { + // A portion of the data for the object. The service **may** leave `data` + // empty for any given `ReadResponse`. This enables the service to inform the + // client that the request is still live while it is running an operation to + // generate more data. + ChecksummedData checksummed_data = 1; + + // The checksums of the complete object. If the object is downloaded in full, + // the client should compute one of these checksums over the downloaded object + // and compare it against the value provided here. + ObjectChecksums object_checksums = 2; + + // If read_offset and or read_limit was specified on the + // ReadObjectRequest, ContentRange will be populated on the first + // ReadObjectResponse message of the read stream. + ContentRange content_range = 3; + + // Metadata of the object whose media is being returned. + // Only populated in the first response in the stream. + Object metadata = 4; +} + +// Describes an attempt to insert an object, possibly over multiple requests. +message WriteObjectSpec { + // Required. Destination object, including its name and its metadata. + Object resource = 1 [(google.api.field_behavior) = REQUIRED]; + + // Apply a predefined set of access controls to this object. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string predefined_acl = 7; + + // Makes the operation conditional on whether the object's current + // generation matches the given value. Setting to 0 makes the operation + // succeed only if there are no live versions of the object. + optional int64 if_generation_match = 3; + + // Makes the operation conditional on whether the object's live + // generation does not match the given value. If no live object exists, the + // precondition fails. Setting to 0 makes the operation succeed only if + // there is a live version of the object. + optional int64 if_generation_not_match = 4; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 5; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 6; + + // The expected final object size being uploaded. + // If this value is set, closing the stream after writing fewer or more than + // `object_size` bytes will result in an OUT_OF_RANGE error. + // + // This situation is considered a client error, and if such an error occurs + // you must start the upload over from scratch, this time sending the correct + // number of bytes. + optional int64 object_size = 8; +} + +// Request message for WriteObject. +message WriteObjectRequest { + // The first message of each stream should set one of the following. + oneof first_message { + // For resumable uploads. This should be the `upload_id` returned from a + // call to `StartResumableWriteResponse`. + string upload_id = 1; + + // For non-resumable uploads. Describes the overall upload, including the + // destination bucket and object name, preconditions, etc. + WriteObjectSpec write_object_spec = 2; + } + + // Required. The offset from the beginning of the object at which the data + // should be written. + // + // In the first `WriteObjectRequest` of a `WriteObject()` action, it + // indicates the initial offset for the `Write()` call. The value **must** be + // equal to the `persisted_size` that a call to `QueryWriteStatus()` would + // return (0 if this is the first write to the object). + // + // On subsequent calls, this value **must** be no larger than the sum of the + // first `write_offset` and the sizes of all `data` chunks sent previously on + // this stream. + // + // An incorrect value will cause an error. + int64 write_offset = 3 [(google.api.field_behavior) = REQUIRED]; + + // A portion of the data for the object. + oneof data { + // The data to insert. If a crc32c checksum is provided that doesn't match + // the checksum computed by the service, the request will fail. + ChecksummedData checksummed_data = 4; + } + + // Checksums for the complete object. If the checksums computed by the service + // don't match the specified checksums the call will fail. May only be + // provided in the first or last request (either with first_message, or + // finish_write set). + ObjectChecksums object_checksums = 6; + + // If `true`, this indicates that the write is complete. Sending any + // `WriteObjectRequest`s subsequent to one in which `finish_write` is `true` + // will cause an error. + // For a non-resumable write (where the upload_id was not set in the first + // message), it is an error not to set this field in the final message of the + // stream. + bool finish_write = 7; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 8; +} + +// Response message for WriteObject. +message WriteObjectResponse { + // The response will set one of the following. + oneof write_status { + // The total number of bytes that have been processed for the given object + // from all `WriteObject` calls. Only set if the upload has not finalized. + int64 persisted_size = 1; + + // A resource containing the metadata for the uploaded object. Only set if + // the upload has finalized. + Object resource = 2; + } +} + +// Request message for BidiWriteObject. +message BidiWriteObjectRequest { + // The first message of each stream should set one of the following. + oneof first_message { + // For resumable uploads. This should be the `upload_id` returned from a + // call to `StartResumableWriteResponse`. + string upload_id = 1; + + // For non-resumable uploads. Describes the overall upload, including the + // destination bucket and object name, preconditions, etc. + WriteObjectSpec write_object_spec = 2; + } + + // Required. The offset from the beginning of the object at which the data + // should be written. + // + // In the first `WriteObjectRequest` of a `WriteObject()` action, it + // indicates the initial offset for the `Write()` call. The value **must** be + // equal to the `persisted_size` that a call to `QueryWriteStatus()` would + // return (0 if this is the first write to the object). + // + // On subsequent calls, this value **must** be no larger than the sum of the + // first `write_offset` and the sizes of all `data` chunks sent previously on + // this stream. + // + // An invalid value will cause an error. + int64 write_offset = 3 [(google.api.field_behavior) = REQUIRED]; + + // A portion of the data for the object. + oneof data { + // The data to insert. If a crc32c checksum is provided that doesn't match + // the checksum computed by the service, the request will fail. + ChecksummedData checksummed_data = 4; + } + + // Checksums for the complete object. If the checksums computed by the service + // don't match the specified checksums the call will fail. May only be + // provided in the first request or the + // last request (with finish_write set). + ObjectChecksums object_checksums = 6; + + // For each BidiWriteObjectRequest where state_lookup is `true` or the client + // closes the stream, the service will send a BidiWriteObjectResponse + // containing the current persisted size. The persisted size sent in responses + // covers all the bytes the server has persisted thus far and can be used to + // decide what data is safe for the client to drop. Note that the object's + // current size reported by the BidiWriteObjectResponse may lag behind the + // number of bytes written by the client. This field is ignored if + // `finish_write` is set to true. + bool state_lookup = 7; + + // Persists data written on the stream, up to and including the current + // message, to permanent storage. This option should be used sparingly as it + // may reduce performance. Ongoing writes will periodically be persisted on + // the server even when `flush` is not set. This field is ignored if + // `finish_write` is set to true since there's no need to checkpoint or flush + // if this message completes the write. + bool flush = 8; + + // If `true`, this indicates that the write is complete. Sending any + // `WriteObjectRequest`s subsequent to one in which `finish_write` is `true` + // will cause an error. + // For a non-resumable write (where the upload_id was not set in the first + // message), it is an error not to set this field in the final message of the + // stream. + bool finish_write = 9; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 10; +} + +// Response message for BidiWriteObject. +message BidiWriteObjectResponse { + // The response will set one of the following. + oneof write_status { + // The total number of bytes that have been processed for the given object + // from all `WriteObject` calls. Only set if the upload has not finalized. + int64 persisted_size = 1; + + // A resource containing the metadata for the uploaded object. Only set if + // the upload has finalized. + Object resource = 2; + } +} + +// Request message for ListObjects. +message ListObjectsRequest { + // Required. Name of the bucket in which to look for objects. + string parent = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Maximum number of `items` plus `prefixes` to return + // in a single page of responses. As duplicate `prefixes` are + // omitted, fewer total results may be returned than requested. The service + // will use this parameter or 1,000 items, whichever is smaller. + int32 page_size = 2; + + // A previously-returned page token representing part of the larger set of + // results to view. + string page_token = 3; + + // If set, returns results in a directory-like mode. `items` will contain + // only objects whose names, aside from the `prefix`, do not + // contain `delimiter`. Objects whose names, aside from the + // `prefix`, contain `delimiter` will have their name, + // truncated after the `delimiter`, returned in + // `prefixes`. Duplicate `prefixes` are omitted. + string delimiter = 4; + + // If true, objects that end in exactly one instance of `delimiter` + // will have their metadata included in `items` in addition to + // `prefixes`. + bool include_trailing_delimiter = 5; + + // Filter results to objects whose names begin with this prefix. + string prefix = 6; + + // If `true`, lists all versions of an object as distinct results. + // For more information, see + // [Object + // Versioning](https://cloud.google.com/storage/docs/object-versioning). + bool versions = 7; + + // Mask specifying which fields to read from each result. + // If no mask is specified, will default to all fields except items.acl and + // items.owner. + // * may be used to mean "all fields". + optional google.protobuf.FieldMask read_mask = 8; + + // Optional. Filter results to objects whose names are lexicographically equal + // to or after lexicographic_start. If lexicographic_end is also set, the + // objects listed have names between lexicographic_start (inclusive) and + // lexicographic_end (exclusive). + string lexicographic_start = 10 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Filter results to objects whose names are lexicographically + // before lexicographic_end. If lexicographic_start is also set, the objects + // listed have names between lexicographic_start (inclusive) and + // lexicographic_end (exclusive). + string lexicographic_end = 11 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. If true, only list all soft-deleted versions of the object. + // Soft delete policy is required to set this option. + bool soft_deleted = 12 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. If true, will also include folders and managed folders (besides + // objects) in the returned `prefixes`. Requires `delimiter` to be set to '/'. + bool include_folders_as_prefixes = 13 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Filter results to objects and prefixes that match this glob + // pattern. See [List Objects Using + // Glob](https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-objects-and-prefixes-using-glob) + // for the full syntax. + string match_glob = 14 [(google.api.field_behavior) = OPTIONAL]; +} + +// Request object for `QueryWriteStatus`. +message QueryWriteStatusRequest { + // Required. The name of the resume token for the object whose write status is + // being requested. + string upload_id = 1 [(google.api.field_behavior) = REQUIRED]; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 2; +} + +// Response object for `QueryWriteStatus`. +message QueryWriteStatusResponse { + // The response will set one of the following. + oneof write_status { + // The total number of bytes that have been processed for the given object + // from all `WriteObject` calls. This is the correct value for the + // 'write_offset' field to use when resuming the `WriteObject` operation. + // Only set if the upload has not finalized. + int64 persisted_size = 1; + + // A resource containing the metadata for the uploaded object. Only set if + // the upload has finalized. + Object resource = 2; + } +} + +// Request message for RewriteObject. +// If the source object is encrypted using a Customer-Supplied Encryption Key +// the key information must be provided in the copy_source_encryption_algorithm, +// copy_source_encryption_key_bytes, and copy_source_encryption_key_sha256_bytes +// fields. If the destination object should be encrypted the keying information +// should be provided in the encryption_algorithm, encryption_key_bytes, and +// encryption_key_sha256_bytes fields of the +// common_object_request_params.customer_encryption field. +message RewriteObjectRequest { + // Required. Immutable. The name of the destination object. + // See the + // [Naming Guidelines](https://cloud.google.com/storage/docs/objects#naming). + // Example: `test.txt` + // The `name` field by itself does not uniquely identify a Cloud Storage + // object. A Cloud Storage object is uniquely identified by the tuple of + // (bucket, object, generation). + string destination_name = 24 [ + (google.api.field_behavior) = REQUIRED, + (google.api.field_behavior) = IMMUTABLE + ]; + + // Required. Immutable. The name of the bucket containing the destination + // object. + string destination_bucket = 25 [ + (google.api.field_behavior) = REQUIRED, + (google.api.field_behavior) = IMMUTABLE, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // The name of the Cloud KMS key that will be used to encrypt the destination + // object. The Cloud KMS key must be located in same location as the object. + // If the parameter is not specified, the request uses the destination + // bucket's default encryption key, if any, or else the Google-managed + // encryption key. + string destination_kms_key = 27 [(google.api.resource_reference) = { + type: "cloudkms.googleapis.com/CryptoKey" + }]; + + // Properties of the destination, post-rewrite object. + // The `name`, `bucket` and `kms_key` fields must not be populated (these + // values are specified in the `destination_name`, `destination_bucket`, and + // `destination_kms_key` fields). + // If `destination` is present it will be used to construct the destination + // object's metadata; otherwise the destination object's metadata will be + // copied from the source object. + Object destination = 1; + + // Required. Name of the bucket in which to find the source object. + string source_bucket = 2 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. Name of the source object. + string source_object = 3 [(google.api.field_behavior) = REQUIRED]; + + // If present, selects a specific revision of the source object (as opposed to + // the latest version, the default). + int64 source_generation = 4; + + // Include this field (from the previous rewrite response) on each rewrite + // request after the first one, until the rewrite response 'done' flag is + // true. Calls that provide a rewriteToken can omit all other request fields, + // but if included those fields must match the values provided in the first + // rewrite request. + string rewrite_token = 5; + + // Apply a predefined set of access controls to the destination object. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string destination_predefined_acl = 28; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 7; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 8; + + // Makes the operation conditional on whether the destination object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 9; + + // Makes the operation conditional on whether the destination object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 10; + + // Makes the operation conditional on whether the source object's live + // generation matches the given value. + optional int64 if_source_generation_match = 11; + + // Makes the operation conditional on whether the source object's live + // generation does not match the given value. + optional int64 if_source_generation_not_match = 12; + + // Makes the operation conditional on whether the source object's current + // metageneration matches the given value. + optional int64 if_source_metageneration_match = 13; + + // Makes the operation conditional on whether the source object's current + // metageneration does not match the given value. + optional int64 if_source_metageneration_not_match = 14; + + // The maximum number of bytes that will be rewritten per rewrite request. + // Most callers + // shouldn't need to specify this parameter - it is primarily in place to + // support testing. If specified the value must be an integral multiple of + // 1 MiB (1048576). Also, this only applies to requests where the source and + // destination span locations and/or storage classes. Finally, this value must + // not change across rewrite calls else you'll get an error that the + // `rewriteToken` is invalid. + int64 max_bytes_rewritten_per_call = 15; + + // The algorithm used to encrypt the source object, if any. Used if the source + // object was encrypted with a Customer-Supplied Encryption Key. + string copy_source_encryption_algorithm = 16; + + // The raw bytes (not base64-encoded) AES-256 encryption key used to encrypt + // the source object, if it was encrypted with a Customer-Supplied Encryption + // Key. + bytes copy_source_encryption_key_bytes = 21; + + // The raw bytes (not base64-encoded) SHA256 hash of the encryption key used + // to encrypt the source object, if it was encrypted with a Customer-Supplied + // Encryption Key. + bytes copy_source_encryption_key_sha256_bytes = 22; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 19; + + // The checksums of the complete object. This will be used to validate the + // destination object after rewriting. + ObjectChecksums object_checksums = 29; +} + +// A rewrite response. +message RewriteResponse { + // The total bytes written so far, which can be used to provide a waiting user + // with a progress indicator. This property is always present in the response. + int64 total_bytes_rewritten = 1; + + // The total size of the object being copied in bytes. This property is always + // present in the response. + int64 object_size = 2; + + // `true` if the copy is finished; otherwise, `false` if + // the copy is in progress. This property is always present in the response. + bool done = 3; + + // A token to use in subsequent requests to continue copying data. This token + // is present in the response only when there is more data to copy. + string rewrite_token = 4; + + // A resource containing the metadata for the copied-to object. This property + // is present in the response only when copying completes. + Object resource = 5; +} + +// Request message for MoveObject. +message MoveObjectRequest { + // Required. Name of the bucket in which the object resides. + string bucket = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // Required. Name of the source object. + string source_object = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. Name of the destination object. + string destination_object = 3 [(google.api.field_behavior) = REQUIRED]; + + // Optional. Makes the operation conditional on whether the source object's + // current generation matches the given value. `if_source_generation_match` + // and `if_source_generation_not_match` conditions are mutually exclusive: + // it's an error for both of them to be set in the request. + optional int64 if_source_generation_match = 4 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the source object's + // current generation does not match the given value. + // `if_source_generation_match` and `if_source_generation_not_match` + // conditions are mutually exclusive: it's an error for both of them to be set + // in the request. + optional int64 if_source_generation_not_match = 5 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the source object's + // current metageneration matches the given value. + // `if_source_metageneration_match` and `if_source_metageneration_not_match` + // conditions are mutually exclusive: it's an error for both of them to be set + // in the request. + optional int64 if_source_metageneration_match = 6 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the source object's + // current metageneration does not match the given value. + // `if_source_metageneration_match` and `if_source_metageneration_not_match` + // conditions are mutually exclusive: it's an error for both of them to be set + // in the request. + optional int64 if_source_metageneration_not_match = 7 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the destination + // object's current generation matches the given value. Setting to 0 makes the + // operation succeed only if there are no live versions of the object. + // `if_generation_match` and `if_generation_not_match` conditions are mutually + // exclusive: it's an error for both of them to be set in the request. + optional int64 if_generation_match = 8 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the destination + // object's current generation does not match the given value. If no live + // object exists, the precondition fails. Setting to 0 makes the operation + // succeed only if there is a live version of the object. + // `if_generation_match` and `if_generation_not_match` conditions are mutually + // exclusive: it's an error for both of them to be set in the request. + optional int64 if_generation_not_match = 9 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the destination + // object's current metageneration matches the given value. + // `if_metageneration_match` and `if_metageneration_not_match` conditions are + // mutually exclusive: it's an error for both of them to be set in the + // request. + optional int64 if_metageneration_match = 10 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Makes the operation conditional on whether the destination + // object's current metageneration does not match the given value. + // `if_metageneration_match` and `if_metageneration_not_match` conditions are + // mutually exclusive: it's an error for both of them to be set in the + // request. + optional int64 if_metageneration_not_match = 11 + [(google.api.field_behavior) = OPTIONAL]; +} + +// Request message StartResumableWrite. +message StartResumableWriteRequest { + // Required. The destination bucket, object, and metadata, as well as any + // preconditions. + WriteObjectSpec write_object_spec = 1 + [(google.api.field_behavior) = REQUIRED]; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 3; + + // The checksums of the complete object. This will be used to validate the + // uploaded object. For each upload, object_checksums can be provided with + // either StartResumableWriteRequest or the WriteObjectRequest with + // finish_write set to `true`. + ObjectChecksums object_checksums = 5; +} + +// Response object for `StartResumableWrite`. +message StartResumableWriteResponse { + // The upload_id of the newly started resumable write operation. This + // value should be copied into the `WriteObjectRequest.upload_id` field. + string upload_id = 1; +} + +// Request message for UpdateObject. +message UpdateObjectRequest { + // Required. The object to update. + // The object's bucket and name fields are used to identify the object to + // update. If present, the object's generation field selects a specific + // revision of this object whose metadata should be updated. Otherwise, + // assumes the live version of the object. + Object object = 1 [(google.api.field_behavior) = REQUIRED]; + + // Makes the operation conditional on whether the object's current generation + // matches the given value. Setting to 0 makes the operation succeed only if + // there are no live versions of the object. + optional int64 if_generation_match = 2; + + // Makes the operation conditional on whether the object's live generation + // does not match the given value. If no live object exists, the precondition + // fails. Setting to 0 makes the operation succeed only if there is a live + // version of the object. + optional int64 if_generation_not_match = 3; + + // Makes the operation conditional on whether the object's current + // metageneration matches the given value. + optional int64 if_metageneration_match = 4; + + // Makes the operation conditional on whether the object's current + // metageneration does not match the given value. + optional int64 if_metageneration_not_match = 5; + + // Apply a predefined set of access controls to this object. + // Valid values are "authenticatedRead", "bucketOwnerFullControl", + // "bucketOwnerRead", "private", "projectPrivate", or "publicRead". + string predefined_acl = 10; + + // Required. List of fields to be updated. + // + // To specify ALL fields, equivalent to the JSON API's "update" function, + // specify a single field with the value `*`. Note: not recommended. If a new + // field is introduced at a later time, an older client updating with the `*` + // may accidentally reset the new field's value. + // + // Not specifying any fields is an error. + google.protobuf.FieldMask update_mask = 7 + [(google.api.field_behavior) = REQUIRED]; + + // A set of parameters common to Storage API requests concerning an object. + CommonObjectRequestParams common_object_request_params = 8; +} + +// Parameters that can be passed to any object request. +message CommonObjectRequestParams { + // Encryption algorithm used with the Customer-Supplied Encryption Keys + // feature. + string encryption_algorithm = 1; + + // Encryption key used with the Customer-Supplied Encryption Keys feature. + // In raw bytes format (not base64-encoded). + bytes encryption_key_bytes = 4; + + // SHA256 hash of encryption key used with the Customer-Supplied Encryption + // Keys feature. + bytes encryption_key_sha256_bytes = 5; +} + +// Shared constants. +message ServiceConstants { + // A collection of constant values meaningful to the Storage API. + enum Values { + option allow_alias = true; + + // Unused. Proto3 requires first enum to be 0. + VALUES_UNSPECIFIED = 0; + + // The maximum size chunk that can will be returned in a single + // ReadRequest. + // 2 MiB. + MAX_READ_CHUNK_BYTES = 2097152; + + // The maximum size chunk that can be sent in a single WriteObjectRequest. + // 2 MiB. + MAX_WRITE_CHUNK_BYTES = 2097152; + + // The maximum size of an object in MB - whether written in a single stream + // or composed from multiple other objects. + // 5 TiB. + MAX_OBJECT_SIZE_MB = 5242880; + + // The maximum length field name that can be sent in a single + // custom metadata field. + // 1 KiB. + MAX_CUSTOM_METADATA_FIELD_NAME_BYTES = 1024; + + // The maximum length field value that can be sent in a single + // custom_metadata field. + // 4 KiB. + MAX_CUSTOM_METADATA_FIELD_VALUE_BYTES = 4096; + + // The maximum total bytes that can be populated into all field names and + // values of the custom_metadata for one object. + // 8 KiB. + MAX_CUSTOM_METADATA_TOTAL_SIZE_BYTES = 8192; + + // The maximum total bytes that can be populated into all bucket metadata + // fields. + // 20 KiB. + MAX_BUCKET_METADATA_TOTAL_SIZE_BYTES = 20480; + + // The maximum number of NotificationConfigs that can be registered + // for a given bucket. + MAX_NOTIFICATION_CONFIGS_PER_BUCKET = 100; + + // The maximum number of LifecycleRules that can be registered for a given + // bucket. + MAX_LIFECYCLE_RULES_PER_BUCKET = 100; + + // The maximum number of custom attributes per NotificationConfigs. + MAX_NOTIFICATION_CUSTOM_ATTRIBUTES = 5; + + // The maximum length of a custom attribute key included in + // NotificationConfig. + MAX_NOTIFICATION_CUSTOM_ATTRIBUTE_KEY_LENGTH = 256; + + // The maximum length of a custom attribute value included in a + // NotificationConfig. + MAX_NOTIFICATION_CUSTOM_ATTRIBUTE_VALUE_LENGTH = 1024; + + // The maximum number of key/value entries per bucket label. + MAX_LABELS_ENTRIES_COUNT = 64; + + // The maximum character length of the key or value in a bucket + // label map. + MAX_LABELS_KEY_VALUE_LENGTH = 63; + + // The maximum byte size of the key or value in a bucket label + // map. + MAX_LABELS_KEY_VALUE_BYTES = 128; + + // The maximum number of object IDs that can be included in a + // DeleteObjectsRequest. + MAX_OBJECT_IDS_PER_DELETE_OBJECTS_REQUEST = 1000; + + // The maximum number of days for which a token returned by the + // GetListObjectsSplitPoints RPC is valid. + SPLIT_TOKEN_MAX_VALID_DAYS = 14; + } +} + +// A bucket. +message Bucket { + option (google.api.resource) = { + type: "storage.googleapis.com/Bucket" + pattern: "projects/{project}/buckets/{bucket}" + plural: "buckets" + singular: "bucket" + }; + + // Billing properties of a bucket. + message Billing { + // When set to true, Requester Pays is enabled for this bucket. + bool requester_pays = 1; + } + + // Cross-Origin Response sharing (CORS) properties for a bucket. + // For more on Cloud Storage and CORS, see + // https://cloud.google.com/storage/docs/cross-origin. + // For more on CORS in general, see https://tools.ietf.org/html/rfc6454. + message Cors { + // The list of Origins eligible to receive CORS response headers. See + // [https://tools.ietf.org/html/rfc6454][RFC 6454] for more on origins. + // Note: "*" is permitted in the list of origins, and means "any Origin". + repeated string origin = 1; + + // The list of HTTP methods on which to include CORS response headers, + // (`GET`, `OPTIONS`, `POST`, etc) Note: "*" is permitted in the list of + // methods, and means "any method". + repeated string method = 2; + + // The list of HTTP headers other than the + // [https://www.w3.org/TR/cors/#simple-response-header][simple response + // headers] to give permission for the user-agent to share across domains. + repeated string response_header = 3; + + // The value, in seconds, to return in the + // [https://www.w3.org/TR/cors/#access-control-max-age-response-header][Access-Control-Max-Age + // header] used in preflight responses. + int32 max_age_seconds = 4; + } + + // Encryption properties of a bucket. + message Encryption { + // The name of the Cloud KMS key that will be used to encrypt objects + // inserted into this bucket, if no encryption method is specified. + string default_kms_key = 1 [(google.api.resource_reference) = { + type: "cloudkms.googleapis.com/CryptoKey" + }]; + } + + // Bucket restriction options. + message IamConfig { + // Settings for Uniform Bucket level access. + // See https://cloud.google.com/storage/docs/uniform-bucket-level-access. + message UniformBucketLevelAccess { + // If set, access checks only use bucket-level IAM policies or above. + bool enabled = 1; + + // The deadline time for changing + // `iam_config.uniform_bucket_level_access.enabled` from `true` to + // `false`. Mutable until the specified deadline is reached, but not + // afterward. + google.protobuf.Timestamp lock_time = 2; + } + + // Bucket restriction options currently enforced on the bucket. + UniformBucketLevelAccess uniform_bucket_level_access = 1; + + // Whether IAM will enforce public access prevention. Valid values are + // "enforced" or "inherited". + string public_access_prevention = 3; + } + + // Lifecycle properties of a bucket. + // For more information, see https://cloud.google.com/storage/docs/lifecycle. + message Lifecycle { + // A lifecycle Rule, combining an action to take on an object and a + // condition which will trigger that action. + message Rule { + // An action to take on an object. + message Action { + // Type of the action. Currently, only `Delete`, `SetStorageClass`, and + // `AbortIncompleteMultipartUpload` are supported. + string type = 1; + + // Target storage class. Required iff the type of the action is + // SetStorageClass. + string storage_class = 2; + } + + // A condition of an object which triggers some action. + message Condition { + // Age of an object (in days). This condition is satisfied when an + // object reaches the specified age. + // A value of 0 indicates that all objects immediately match this + // condition. + optional int32 age_days = 1; + + // This condition is satisfied when an object is created before midnight + // of the specified date in UTC. + google.type.Date created_before = 2; + + // Relevant only for versioned objects. If the value is + // `true`, this condition matches live objects; if the value + // is `false`, it matches archived objects. + optional bool is_live = 3; + + // Relevant only for versioned objects. If the value is N, this + // condition is satisfied when there are at least N versions (including + // the live version) newer than this version of the object. + optional int32 num_newer_versions = 4; + + // Objects having any of the storage classes specified by this condition + // will be matched. Values include `MULTI_REGIONAL`, `REGIONAL`, + // `NEARLINE`, `COLDLINE`, `STANDARD`, and + // `DURABLE_REDUCED_AVAILABILITY`. + repeated string matches_storage_class = 5; + + // Number of days that have elapsed since the custom timestamp set on an + // object. + // The value of the field must be a nonnegative integer. + optional int32 days_since_custom_time = 7; + + // An object matches this condition if the custom timestamp set on the + // object is before the specified date in UTC. + google.type.Date custom_time_before = 8; + + // This condition is relevant only for versioned objects. An object + // version satisfies this condition only if these many days have been + // passed since it became noncurrent. The value of the field must be a + // nonnegative integer. If it's zero, the object version will become + // eligible for Lifecycle action as soon as it becomes noncurrent. + optional int32 days_since_noncurrent_time = 9; + + // This condition is relevant only for versioned objects. An object + // version satisfies this condition only if it became noncurrent before + // the specified date in UTC. + google.type.Date noncurrent_time_before = 10; + + // List of object name prefixes. If any prefix exactly matches the + // beginning of the object name, the condition evaluates to true. + repeated string matches_prefix = 11; + + // List of object name suffixes. If any suffix exactly matches the + // end of the object name, the condition evaluates to true. + repeated string matches_suffix = 12; + } + + // The action to take. + Action action = 1; + + // The condition(s) under which the action will be taken. + Condition condition = 2; + } + + // A lifecycle management rule, which is made of an action to take and the + // condition(s) under which the action will be taken. + repeated Rule rule = 1; + } + + // Logging-related properties of a bucket. + message Logging { + // The destination bucket where the current bucket's logs should be placed, + // using path format (like `projects/123456/buckets/foo`). + string log_bucket = 1; + + // A prefix for log object names. + string log_object_prefix = 2; + } + + // Retention policy properties of a bucket. + message RetentionPolicy { + // Server-determined value that indicates the time from which policy was + // enforced and effective. + google.protobuf.Timestamp effective_time = 1; + + // Once locked, an object retention policy cannot be modified. + bool is_locked = 2; + + // The duration that objects need to be retained. Retention duration must be + // greater than zero and less than 100 years. Note that enforcement of + // retention periods less than a day is not guaranteed. Such periods should + // only be used for testing purposes. Any `nanos` value specified will be + // rounded down to the nearest second. + google.protobuf.Duration retention_duration = 4; + } + + // Soft delete policy properties of a bucket. + message SoftDeletePolicy { + // The period of time that soft-deleted objects in the bucket must be + // retained and cannot be permanently deleted. The duration must be greater + // than or equal to 7 days and less than 1 year. + optional google.protobuf.Duration retention_duration = 1; + + // Time from which the policy was effective. This is service-provided. + optional google.protobuf.Timestamp effective_time = 2; + } + + // Properties of a bucket related to versioning. + // For more on Cloud Storage versioning, see + // https://cloud.google.com/storage/docs/object-versioning. + message Versioning { + // While set to true, versioning is fully enabled for this bucket. + bool enabled = 1; + } + + // Properties of a bucket related to accessing the contents as a static + // website. For more on hosting a static website via Cloud Storage, see + // https://cloud.google.com/storage/docs/hosting-static-website. + message Website { + // If the requested object path is missing, the service will ensure the path + // has a trailing '/', append this suffix, and attempt to retrieve the + // resulting object. This allows the creation of `index.html` + // objects to represent directory pages. + string main_page_suffix = 1; + + // If the requested object path is missing, and any + // `mainPageSuffix` object is missing, if applicable, the service + // will return the named object from this bucket as the content for a + // [https://tools.ietf.org/html/rfc7231#section-6.5.4][404 Not Found] + // result. + string not_found_page = 2; + } + + // Configuration for Custom Dual Regions. It should specify precisely two + // eligible regions within the same Multiregion. More information on regions + // may be found [https://cloud.google.com/storage/docs/locations][here]. + message CustomPlacementConfig { + // List of locations to use for data placement. + repeated string data_locations = 1; + } + + // Configuration for a bucket's Autoclass feature. + message Autoclass { + // Enables Autoclass. + bool enabled = 1; + + // Output only. Latest instant at which the `enabled` field was set to true + // after being disabled/unconfigured or set to false after being enabled. If + // Autoclass is enabled when the bucket is created, the toggle_time is set + // to the bucket creation time. + google.protobuf.Timestamp toggle_time = 2 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // An object in an Autoclass bucket will eventually cool down to the + // terminal storage class if there is no access to the object. + // The only valid values are NEARLINE and ARCHIVE. + optional string terminal_storage_class = 3; + + // Output only. Latest instant at which the autoclass terminal storage class + // was updated. + optional google.protobuf.Timestamp terminal_storage_class_update_time = 4 + [(google.api.field_behavior) = OUTPUT_ONLY]; + } + + // Configuration for a bucket's hierarchical namespace feature. + message HierarchicalNamespace { + // Optional. Enables the hierarchical namespace feature. + bool enabled = 1 [(google.api.field_behavior) = OPTIONAL]; + } + + // Immutable. The name of the bucket. + // Format: `projects/{project}/buckets/{bucket}` + string name = 1 [(google.api.field_behavior) = IMMUTABLE]; + + // Output only. The user-chosen part of the bucket name. The `{bucket}` + // portion of the `name` field. For globally unique buckets, this is equal to + // the "bucket name" of other Cloud Storage APIs. Example: "pub". + string bucket_id = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The etag of the bucket. + // If included in the metadata of an UpdateBucketRequest, the operation will + // only be performed if the etag matches that of the bucket. + string etag = 29; + + // Immutable. The project which owns this bucket, in the format of + // "projects/{projectIdentifier}". + // {projectIdentifier} can be the project ID or project number. + string project = 3 [ + (google.api.field_behavior) = IMMUTABLE, + (google.api.resource_reference) = { + type: "cloudresourcemanager.googleapis.com/Project" + } + ]; + + // Output only. The metadata generation of this bucket. + int64 metageneration = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Immutable. The location of the bucket. Object data for objects in the + // bucket resides in physical storage within this region. Defaults to `US`. + // See the + // [https://developers.google.com/storage/docs/concepts-techniques#specifyinglocations"][developer's + // guide] for the authoritative list. Attempting to update this field after + // the bucket is created will result in an error. + string location = 5 [(google.api.field_behavior) = IMMUTABLE]; + + // Output only. The location type of the bucket (region, dual-region, + // multi-region, etc). + string location_type = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The bucket's default storage class, used whenever no storageClass is + // specified for a newly-created object. This defines how objects in the + // bucket are stored and determines the SLA and the cost of storage. + // If this value is not specified when the bucket is created, it will default + // to `STANDARD`. For more information, see + // https://developers.google.com/storage/docs/storage-classes. + string storage_class = 7; + + // The recovery point objective for cross-region replication of the bucket. + // Applicable only for dual- and multi-region buckets. "DEFAULT" uses default + // replication. "ASYNC_TURBO" enables turbo replication, valid for dual-region + // buckets only. If rpo is not specified when the bucket is created, it + // defaults to "DEFAULT". For more information, see + // https://cloud.google.com/storage/docs/availability-durability#turbo-replication. + string rpo = 27; + + // Access controls on the bucket. + // If iam_config.uniform_bucket_level_access is enabled on this bucket, + // requests to set, read, or modify acl is an error. + repeated BucketAccessControl acl = 8; + + // Default access controls to apply to new objects when no ACL is provided. + // If iam_config.uniform_bucket_level_access is enabled on this bucket, + // requests to set, read, or modify acl is an error. + repeated ObjectAccessControl default_object_acl = 9; + + // The bucket's lifecycle config. See + // [https://developers.google.com/storage/docs/lifecycle]Lifecycle Management] + // for more information. + Lifecycle lifecycle = 10; + + // Output only. The creation time of the bucket. + google.protobuf.Timestamp create_time = 11 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The bucket's [https://www.w3.org/TR/cors/][Cross-Origin Resource Sharing] + // (CORS) config. + repeated Cors cors = 12; + + // Output only. The modification time of the bucket. + google.protobuf.Timestamp update_time = 13 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The default value for event-based hold on newly created objects in this + // bucket. Event-based hold is a way to retain objects indefinitely until an + // event occurs, signified by the + // hold's release. After being released, such objects will be subject to + // bucket-level retention (if any). One sample use case of this flag is for + // banks to hold loan documents for at least 3 years after loan is paid in + // full. Here, bucket-level retention is 3 years and the event is loan being + // paid in full. In this example, these objects will be held intact for any + // number of years until the event has occurred (event-based hold on the + // object is released) and then 3 more years after that. That means retention + // duration of the objects begins from the moment event-based hold + // transitioned from true to false. Objects under event-based hold cannot be + // deleted, overwritten or archived until the hold is removed. + bool default_event_based_hold = 14; + + // User-provided labels, in key/value pairs. + map labels = 15; + + // The bucket's website config, controlling how the service behaves + // when accessing bucket contents as a web site. See the + // [https://cloud.google.com/storage/docs/static-website][Static Website + // Examples] for more information. + Website website = 16; + + // The bucket's versioning config. + Versioning versioning = 17; + + // The bucket's logging config, which defines the destination bucket + // and name prefix (if any) for the current bucket's logs. + Logging logging = 18; + + // Output only. The owner of the bucket. This is always the project team's + // owner group. + Owner owner = 19 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Encryption config for a bucket. + Encryption encryption = 20; + + // The bucket's billing config. + Billing billing = 21; + + // The bucket's retention policy. The retention policy enforces a minimum + // retention time for all objects contained in the bucket, based on their + // creation time. Any attempt to overwrite or delete objects younger than the + // retention period will result in a PERMISSION_DENIED error. An unlocked + // retention policy can be modified or removed from the bucket via a + // storage.buckets.update operation. A locked retention policy cannot be + // removed or shortened in duration for the lifetime of the bucket. + // Attempting to remove or decrease period of a locked retention policy will + // result in a PERMISSION_DENIED error. + RetentionPolicy retention_policy = 22; + + // The bucket's IAM config. + IamConfig iam_config = 23; + + // Reserved for future use. + bool satisfies_pzs = 25; + + // Configuration that, if present, specifies the data placement for a + // [https://cloud.google.com/storage/docs/locations#location-dr][configurable + // dual-region]. + CustomPlacementConfig custom_placement_config = 26; + + // The bucket's Autoclass configuration. If there is no configuration, the + // Autoclass feature will be disabled and have no effect on the bucket. + Autoclass autoclass = 28; + + // Optional. The bucket's hierarchical namespace configuration. If there is no + // configuration, the hierarchical namespace feature will be disabled and have + // no effect on the bucket. + HierarchicalNamespace hierarchical_namespace = 32 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. The bucket's soft delete policy. The soft delete policy prevents + // soft-deleted objects from being permanently deleted. + SoftDeletePolicy soft_delete_policy = 31 + [(google.api.field_behavior) = OPTIONAL]; +} + +// An access-control entry. +message BucketAccessControl { + // The access permission for the entity. + string role = 1; + + // The ID of the access-control entry. + string id = 2; + + // The entity holding the permission, in one of the following forms: + // * `user-{userid}` + // * `user-{email}` + // * `group-{groupid}` + // * `group-{email}` + // * `domain-{domain}` + // * `project-{team}-{projectnumber}` + // * `project-{team}-{projectid}` + // * `allUsers` + // * `allAuthenticatedUsers` + // Examples: + // * The user `liz@example.com` would be `user-liz@example.com`. + // * The group `example@googlegroups.com` would be + // `group-example@googlegroups.com` + // * All members of the Google Apps for Business domain `example.com` would be + // `domain-example.com` + // For project entities, `project-{team}-{projectnumber}` format will be + // returned on response. + string entity = 3; + + // Output only. The alternative entity format, if exists. For project + // entities, `project-{team}-{projectid}` format will be returned on response. + string entity_alt = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The ID for the entity, if any. + string entity_id = 4; + + // The etag of the BucketAccessControl. + // If included in the metadata of an update or delete request message, the + // operation operation will only be performed if the etag matches that of the + // bucket's BucketAccessControl. + string etag = 8; + + // The email address associated with the entity, if any. + string email = 5; + + // The domain associated with the entity, if any. + string domain = 6; + + // The project team associated with the entity, if any. + ProjectTeam project_team = 7; +} + +// Message used to convey content being read or written, along with an optional +// checksum. +message ChecksummedData { + // Optional. The data. + bytes content = 1 [ctype = CORD, (google.api.field_behavior) = OPTIONAL]; + + // If set, the CRC32C digest of the content field. + optional fixed32 crc32c = 2; +} + +// Message used for storing full (not subrange) object checksums. +message ObjectChecksums { + // CRC32C digest of the object data. Computed by the Cloud Storage service for + // all written objects. + // If set in a WriteObjectRequest, service will validate that the stored + // object matches this checksum. + optional fixed32 crc32c = 1; + + // 128 bit MD5 hash of the object data. + // For more information about using the MD5 hash, see + // [https://cloud.google.com/storage/docs/hashes-etags#json-api][Hashes and + // ETags: Best Practices]. + // Not all objects will provide an MD5 hash. For example, composite objects + // provide only crc32c hashes. This value is equivalent to running `cat + // object.txt | openssl md5 -binary` + bytes md5_hash = 2; +} + +// Describes the Customer-Supplied Encryption Key mechanism used to store an +// Object's data at rest. +message CustomerEncryption { + // The encryption algorithm. + string encryption_algorithm = 1; + + // SHA256 hash value of the encryption key. + // In raw bytes format (not base64-encoded). + bytes key_sha256_bytes = 3; +} + +// An object. +message Object { + // Immutable. The name of this object. Nearly any sequence of unicode + // characters is valid. See + // [Guidelines](https://cloud.google.com/storage/docs/objects#naming). + // Example: `test.txt` + // The `name` field by itself does not uniquely identify a Cloud Storage + // object. A Cloud Storage object is uniquely identified by the tuple of + // (bucket, object, generation). + string name = 1 [(google.api.field_behavior) = IMMUTABLE]; + + // Immutable. The name of the bucket containing this object. + string bucket = 2 [ + (google.api.field_behavior) = IMMUTABLE, + (google.api.resource_reference) = { type: "storage.googleapis.com/Bucket" } + ]; + + // The etag of the object. + // If included in the metadata of an update or delete request message, the + // operation will only be performed if the etag matches that of the live + // object. + string etag = 27; + + // Immutable. The content generation of this object. Used for object + // versioning. + int64 generation = 3 [(google.api.field_behavior) = IMMUTABLE]; + + // Output only. Restore token used to differentiate deleted objects with the + // same name and generation. This field is output only, and only set for + // deleted objects in HNS buckets. + optional string restore_token = 35 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The version of the metadata for this generation of this + // object. Used for preconditions and for detecting changes in metadata. A + // metageneration number is only meaningful in the context of a particular + // generation of a particular object. + int64 metageneration = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Storage class of the object. + string storage_class = 5; + + // Output only. Content-Length of the object data in bytes, matching + // [https://tools.ietf.org/html/rfc7230#section-3.3.2][RFC 7230 §3.3.2]. + int64 size = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Content-Encoding of the object data, matching + // [https://tools.ietf.org/html/rfc7231#section-3.1.2.2][RFC 7231 §3.1.2.2] + string content_encoding = 7; + + // Content-Disposition of the object data, matching + // [https://tools.ietf.org/html/rfc6266][RFC 6266]. + string content_disposition = 8; + + // Cache-Control directive for the object data, matching + // [https://tools.ietf.org/html/rfc7234#section-5.2"][RFC 7234 §5.2]. + // If omitted, and the object is accessible to all anonymous users, the + // default will be `public, max-age=3600`. + string cache_control = 9; + + // Access controls on the object. + // If iam_config.uniform_bucket_level_access is enabled on the parent + // bucket, requests to set, read, or modify acl is an error. + repeated ObjectAccessControl acl = 10; + + // Content-Language of the object data, matching + // [https://tools.ietf.org/html/rfc7231#section-3.1.3.2][RFC 7231 §3.1.3.2]. + string content_language = 11; + + // Output only. If this object is noncurrent, this is the time when the object + // became noncurrent. + google.protobuf.Timestamp delete_time = 12 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The time when the object was finalized. + google.protobuf.Timestamp finalize_time = 36 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Content-Type of the object data, matching + // [https://tools.ietf.org/html/rfc7231#section-3.1.1.5][RFC 7231 §3.1.1.5]. + // If an object is stored without a Content-Type, it is served as + // `application/octet-stream`. + string content_type = 13; + + // Output only. The creation time of the object. + google.protobuf.Timestamp create_time = 14 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. Number of underlying components that make up this object. + // Components are accumulated by compose operations. + int32 component_count = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. Hashes for the data part of this object. This field is used + // for output only and will be silently ignored if provided in requests. The + // checksums of the complete object regardless of data range. If the object is + // downloaded in full, the client should compute one of these checksums over + // the downloaded object and compare it against the value provided here. + ObjectChecksums checksums = 16 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The modification time of the object metadata. + // Set initially to object creation time and then updated whenever any + // metadata of the object changes. This includes changes made by a requester, + // such as modifying custom metadata, as well as changes made by Cloud Storage + // on behalf of a requester, such as changing the storage class based on an + // Object Lifecycle Configuration. + google.protobuf.Timestamp update_time = 17 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Cloud KMS Key used to encrypt this object, if the object is encrypted by + // such a key. + string kms_key = 18 [(google.api.resource_reference) = { + type: "cloudkms.googleapis.com/CryptoKey" + }]; + + // Output only. The time at which the object's storage class was last changed. + // When the object is initially created, it will be set to time_created. + google.protobuf.Timestamp update_storage_class_time = 19 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Whether an object is under temporary hold. While this flag is set to true, + // the object is protected against deletion and overwrites. A common use case + // of this flag is regulatory investigations where objects need to be retained + // while the investigation is ongoing. Note that unlike event-based hold, + // temporary hold does not impact retention expiration time of an object. + bool temporary_hold = 20; + + // A server-determined value that specifies the earliest time that the + // object's retention period expires. + // Note 1: This field is not provided for objects with an active event-based + // hold, since retention expiration is unknown until the hold is removed. + // Note 2: This value can be provided even when temporary hold is set (so that + // the user can reason about policy without having to first unset the + // temporary hold). + google.protobuf.Timestamp retention_expire_time = 21; + + // User-provided metadata, in key/value pairs. + map metadata = 22; + + // Whether an object is under event-based hold. + // An event-based hold is a way to force the retention of an object until + // after some event occurs. Once the hold is released by explicitly setting + // this field to false, the object will become subject to any bucket-level + // retention policy, except that the retention duration will be calculated + // from the time the event based hold was lifted, rather than the time the + // object was created. + // + // In a WriteObject request, not setting this field implies that the value + // should be taken from the parent bucket's "default_event_based_hold" field. + // In a response, this field will always be set to true or false. + optional bool event_based_hold = 23; + + // Output only. The owner of the object. This will always be the uploader of + // the object. + Owner owner = 24 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Metadata of Customer-Supplied Encryption Key, if the object is encrypted by + // such a key. + CustomerEncryption customer_encryption = 25; + + // A user-specified timestamp set on an object. + google.protobuf.Timestamp custom_time = 26; + + // Output only. This is the time when the object became soft-deleted. + // + // Soft-deleted objects are only accessible if a soft_delete_policy is + // enabled. Also see hard_delete_time. + optional google.protobuf.Timestamp soft_delete_time = 28 + [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The time when the object will be permanently deleted. + // + // Only set when an object becomes soft-deleted with a soft_delete_policy. + // Otherwise, the object will not be accessible. + optional google.protobuf.Timestamp hard_delete_time = 29 + [(google.api.field_behavior) = OUTPUT_ONLY]; +} + +// An access-control entry. +message ObjectAccessControl { + // The access permission for the entity. One of the following values: + // * `READER` + // * `WRITER` + // * `OWNER` + string role = 1; + + // The ID of the access-control entry. + string id = 2; + + // The entity holding the permission, in one of the following forms: + // * `user-{userid}` + // * `user-{email}` + // * `group-{groupid}` + // * `group-{email}` + // * `domain-{domain}` + // * `project-{team}-{projectnumber}` + // * `project-{team}-{projectid}` + // * `allUsers` + // * `allAuthenticatedUsers` + // Examples: + // * The user `liz@example.com` would be `user-liz@example.com`. + // * The group `example@googlegroups.com` would be + // `group-example@googlegroups.com`. + // * All members of the Google Apps for Business domain `example.com` would be + // `domain-example.com`. + // For project entities, `project-{team}-{projectnumber}` format will be + // returned on response. + string entity = 3; + + // Output only. The alternative entity format, if exists. For project + // entities, `project-{team}-{projectid}` format will be returned on response. + string entity_alt = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // The ID for the entity, if any. + string entity_id = 4; + + // The etag of the ObjectAccessControl. + // If included in the metadata of an update or delete request message, the + // operation will only be performed if the etag matches that of the live + // object's ObjectAccessControl. + string etag = 8; + + // The email address associated with the entity, if any. + string email = 5; + + // The domain associated with the entity, if any. + string domain = 6; + + // The project team associated with the entity, if any. + ProjectTeam project_team = 7; +} + +// The result of a call to Objects.ListObjects +message ListObjectsResponse { + // The list of items. + repeated Object objects = 1; + + // The list of prefixes of objects matching-but-not-listed up to and including + // the requested delimiter. + repeated string prefixes = 2; + + // The continuation token, used to page through large result sets. Provide + // this value in a subsequent request to return the next page of results. + string next_page_token = 3; +} + +// Represents the Viewers, Editors, or Owners of a given project. +message ProjectTeam { + // The project number. + string project_number = 1; + + // The team. + string team = 2; +} + +// The owner of a specific resource. +message Owner { + // The entity, in the form `user-`*userId*. + string entity = 1; + + // The ID for the entity. + string entity_id = 2; +} + +// Specifies a requested range of bytes to download. +message ContentRange { + // The starting offset of the object data. This value is inclusive. + int64 start = 1; + + // The ending offset of the object data. This value is exclusive. + int64 end = 2; + + // The complete length of the object data. + int64 complete_length = 3; +} \ No newline at end of file diff --git a/nativelink-store/src/gcs_store.rs b/nativelink-store/src/gcs_store.rs new file mode 100644 index 000000000..d910c6e74 --- /dev/null +++ b/nativelink-store/src/gcs_store.rs @@ -0,0 +1,490 @@ +// Copyright 2024 The NativeLink Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use futures::stream::{unfold, StreamExt}; +use nativelink_config::stores::GcsSpec; +use nativelink_error::{make_err, Code, Error, ResultExt}; +use nativelink_metric::MetricsComponent; +use nativelink_proto::build::bazel::remote::execution::v2 as remexec; +use nativelink_proto::google::storage::v2::storage_client::StorageClient; +use nativelink_proto::google::storage::v2::{Object, WriteObjectSpec}; +use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf}; +use nativelink_util::health_utils::{HealthStatus, HealthStatusIndicator}; +use nativelink_util::instant_wrapper::InstantWrapper; +use nativelink_util::retry::{Retrier, RetryResult}; +use nativelink_util::store_trait::{StoreDriver, StoreKey, UploadSizeInfo}; +use rand::rngs::OsRng; +use rand::Rng; +use tokio::time::sleep; +use tonic::transport::Channel; + +use crate::cas_utils::is_zero_digest; + +const DEFAULT_MAX_RETRY_BUFFER_PER_REQUEST: usize = 5 * 1024 * 1024; // 5MB +const DEFAULT_MULTIPART_MAX_CONCURRENT_UPLOADS: usize = 10; + +/// GcsStore provides a Store implementation backed by Google Cloud Storage. +/// It uses the official GCS gRPC API for all operations. +#[derive(Clone, MetricsComponent)] +pub struct GcsStore +{ + /// The GCS client for making API calls + client: StorageClient, + /// Function to get current time + now_fn: NowFn, + /// The GCS bucket name + #[metric(help = "The GCS bucket name")] + bucket: String, + /// Optional prefix for all object keys + #[metric(help = "The key prefix for the GCS store")] + key_prefix: String, + /// Retry configuration for failed operations + retrier: Retrier, + /// Time after which objects are considered expired + #[metric(help = "The number of seconds to consider an object expired")] + consider_expired_after_s: i64, + /// Maximum buffer size for retrying requests + #[metric(help = "The number of bytes to buffer for retrying requests")] + max_retry_buffer_per_request: usize, + /// Maximum concurrent uploads for multipart operations + #[metric(help = "The number of concurrent uploads allowed for multipart uploads")] + multipart_max_concurrent_uploads: usize, +} + +impl GcsStore +where + I: InstantWrapper, + NowFn: Fn() -> I + Send + Sync + Unpin + 'static, +{ + /// Creates a new GcsStore instance. + /// + /// Implementation steps: + /// 1. Set up GCS client with proper authentication + /// 2. Initialize connection to GCS using provided configuration + /// 3. Set up retry policy and other configurations + /// 4. Return wrapped in Arc for thread-safety + pub async fn new(spec: &GcsSpec, now_fn: NowFn) -> Result, Error> { + let jitter_amt = spec.retry.jitter; + let jitter_fn = Arc::new(move |delay: Duration| { + if jitter_amt == 0. { + return delay; + } + let min = 1. - (jitter_amt / 2.); + let max = 1. + (jitter_amt / 2.); + delay.mul_f32(OsRng.gen_range(min..max)) + }); + + // Create GCS client with default credentials + let channel = Channel::from_static("https://storage.googleapis.com") + .connect() + .await + .err_tip(|| "Failed to create GCS channel")?; + + let client = StorageClient::new(channel); + + // Create GcsStore instance with configuration + let store = Self { + client, + now_fn, + bucket: spec.bucket.clone(), + key_prefix: spec.key_prefix.clone().unwrap_or_default(), + retrier: Retrier::new( + Arc::new(|duration| Box::pin(sleep(duration))), + jitter_fn, + spec.retry.clone(), + ), + consider_expired_after_s: i64::from(spec.consider_expired_after_s), + max_retry_buffer_per_request: spec.max_retry_buffer_per_request + .unwrap_or(DEFAULT_MAX_RETRY_BUFFER_PER_REQUEST), + multipart_max_concurrent_uploads: spec.multipart_max_concurrent_uploads + .unwrap_or(DEFAULT_MULTIPART_MAX_CONCURRENT_UPLOADS), + }; + + Ok(Arc::new(store)) + } + + /// Helper method to construct the full GCS object path + /// by combining the key_prefix with the object key. + fn make_gcs_path(&self, key: &StoreKey<'_>) -> String { + match self.key_prefix.is_empty() { + true => key.as_str().to_string(), + false => format!("{}/{}", self.key_prefix, key.as_str()), + } + } +} + +#[async_trait] +impl StoreDriver for GcsStore +where + I: InstantWrapper, + NowFn: Fn() -> I + Send + Sync + Unpin + 'static, +{ + /// Checks if multiple objects exist in GCS and returns their sizes. + /// + /// Implementation steps: + /// 1. Handle zero digest case + /// 2. Batch object metadata requests for efficiency + /// 3. Use Object.Get API to check existence and get size + /// 4. Handle expired objects based on consider_expired_after_s + /// 5. Return sizes for existing objects, None for missing ones + async fn has_with_results( + self: Pin<&Self>, + keys: &[StoreKey<'_>], + results: &mut [Option], + ) -> Result<(), Error> { + use futures::stream::{FuturesUnordered, StreamExt}; + use nativelink_proto::google::storage::v2::Object; + + for (key, result) in keys.iter().zip(results.iter_mut()) { + // Handle zero digest case + if is_zero_digest(key.borrow()) { + *result = Some(0); + continue; + } + + let gcs_path = self.make_gcs_path(key); + + // Use the retrier to handle transient failures + match self.retrier.retry(async { + let response = self.client + .get_object() + .bucket(&self.bucket) + .object(&gcs_path) + .send() + .await; + + match response { + Ok(obj) => { + let size = obj.into_inner().size.unwrap_or(0); + RetryResult::Ok(Some(size)) + } + Err(status) if status.code() == tonic::Code::NotFound => { + RetryResult::Ok(None) + } + Err(e) => RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to get object metadata: {}", e + )) + } + }).await? { + Some(size) => *result = Some(u64::try_from(size).err_tip(|| "Invalid object size")?), + None => *result = None, + } + } + Ok(()) + } + + /// Uploads an object to GCS. + /// + /// Implementation steps: + /// 1. Handle zero digest case + /// 2. For small files (<5MB): + /// - Use simple upload with Object.Write API + /// 3. For large files: + /// - Use resumable upload protocol + /// - Initialize with Object.StartResumableWrite + /// - Upload chunks with Object.Write + /// - Handle retries and failures + /// 4. Verify upload completion + async fn update( + self: Pin<&Self>, + digest: StoreKey<'_>, + mut reader: DropCloserReadHalf, + upload_size: UploadSizeInfo, + ) -> Result<(), Error> { + // Handle zero digest case + if is_zero_digest(digest.borrow()) { + return Ok(()); + } + + let gcs_path = self.make_gcs_path(&digest); + + // Set maximum buffer size for retrying uploads + reader.set_max_recent_data_size( + u64::try_from(self.max_retry_buffer_per_request) + .err_tip(|| "Could not convert max_retry_buffer_per_request to u64")?, + ); + + // Get the size if known + let size = match upload_size { + UploadSizeInfo::ExactSize(size) | UploadSizeInfo::MaxSize(size) => Some(size), + _ => None, + }; + + // For small files or unknown size, use simple upload + if size.map_or(true, |s| s < 5 * 1024 * 1024) { + // Simple upload + return self.retrier.retry(unfold(reader, move |mut reader| async move { + let mut buffer = Vec::new(); + while let Some(chunk) = reader.next().await { + let chunk = chunk.err_tip(|| "Failed to read chunk from reader").unwrap(); + buffer.extend_from_slice(&chunk); + } + + let retry_result = match self.client + .write_object() + .bucket(&self.bucket) + .object(&gcs_path) + .write_object_spec(nativelink_proto::google::storage::v2::WriteObjectSpec { + resource: Some(nativelink_proto::google::storage::v2::Object { + name: gcs_path.clone(), + bucket: self.bucket.clone(), + ..Default::default() + }), + ..Default::default() + }) + .data(buffer) + .send() + .await + { + Ok(_) => RetryResult::Ok(()), + Err(e) => RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to upload object: {}", e + )) + }; + Some((retry_result, reader)) + })).await; + } + + // For large files, use resumable upload + let upload = self.retrier.retry(unfold((), move |_| async move { + let retry_result = match self.client + .start_resumable_write() + .write_object_spec(nativelink_proto::google::storage::v2::WriteObjectSpec { + resource: Some(nativelink_proto::google::storage::v2::Object { + name: gcs_path.clone(), + bucket: self.bucket.clone(), + ..Default::default() + }), + ..Default::default() + }) + .send() + .await + { + Ok(response) => RetryResult::Ok(response.into_inner().upload_id), + Err(e) => RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to start resumable upload: {}", e + )) + }; + Some((retry_result, ())) + })).await?; + + let mut offset = 0u64; + let mut buffer = Vec::new(); + + // Upload chunks + while let Some(chunk) = reader.next().await { + let chunk = chunk.err_tip(|| "Failed to read chunk from reader")?; + buffer.extend_from_slice(&chunk); + + // Upload when buffer reaches optimal size or on last chunk + if buffer.len() >= 5 * 1024 * 1024 { + let buffer_clone = buffer.clone(); + self.retrier.retry(unfold((), move |_| async move { + let retry_result = match self.client + .write_object() + .bucket(&self.bucket) + .object(&gcs_path) + .upload_id(&upload) + .write_offset(offset) + .data(buffer_clone.clone()) + .send() + .await + { + Ok(_) => RetryResult::Ok(()), + Err(e) => RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to upload chunk: {}", e + )) + }; + Some((retry_result, ())) + })).await?; + + offset += u64::try_from(buffer.len()) + .err_tip(|| "Could not convert buffer length to u64")?; + buffer.clear(); + } + } + + // Upload any remaining data + if !buffer.is_empty() { + let buffer_clone = buffer.clone(); + self.retrier.retry(unfold((), move |_| async move { + let retry_result = match self.client + .write_object() + .bucket(&self.bucket) + .object(&gcs_path) + .upload_id(&upload) + .write_offset(offset) + .data(buffer_clone.clone()) + .finish_write(true) + .send() + .await + { + Ok(_) => RetryResult::Ok(()), + Err(e) => RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to upload final chunk: {}", e + )) + }; + Some((retry_result, ())) + })).await?; + } + + Ok(()) + } + + /// Downloads a part of an object from GCS. + /// + /// Implementation steps: + /// 1. Handle zero digest case + /// 2. Calculate byte range based on offset and length + /// 3. Use Object.Read API with range header + /// 4. Stream data to writer + /// 5. Handle errors and retries + async fn get_part( + self: Pin<&Self>, + key: StoreKey<'_>, + writer: &mut DropCloserWriteHalf, + offset: u64, + length: Option, + ) -> Result<(), Error> { + // Handle zero digest case + if is_zero_digest(key.borrow()) { + return writer.send_eof().err_tip(|| "Failed to send EOF for zero digest"); + } + + let gcs_path = self.make_gcs_path(&key); + + // Calculate end byte if length is specified + let end_byte = length.map(|len| offset + len - 1); + + // Use the retrier to handle transient failures + self.retrier.retry(unfold((writer, offset), move |(writer, current_offset)| async move { + let response = self.client + .read_object() + .bucket(&self.bucket) + .object(&gcs_path) + .read_offset(current_offset) + .set_read_limit(end_byte.map(|end| end - current_offset + 1)) + .send() + .await; + + match response { + Ok(response) => { + let mut stream = response.into_inner(); + + while let Some(chunk_result) = stream.message().await { + match chunk_result { + Ok(chunk) => { + if let Some(data) = chunk.chunk_data { + if data.is_empty() { + continue; // Skip empty chunks + } + if let Err(e) = writer.send(data).await { + return Some((RetryResult::Err(make_err!( + Code::Internal, + "Failed to write data chunk: {}", e + )), (writer, current_offset))); + } + } + } + Err(e) => { + return Some((RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to read chunk: {}", e + )), (writer, current_offset))); + } + } + } + + if let Err(e) = writer.send_eof() { + return Some((RetryResult::Err(make_err!( + Code::Internal, + "Failed to send EOF: {}", e + )), (writer, current_offset))); + } + + Some((RetryResult::Ok(()), (writer, current_offset))) + } + Err(status) if status.code() == tonic::Code::NotFound => { + Some((RetryResult::Err(make_err!( + Code::NotFound, + "Object not found: {}", gcs_path + )), (writer, current_offset))) + } + Err(e) => Some((RetryResult::Retry(make_err!( + Code::Aborted, + "Failed to read object: {}", e + )), (writer, current_offset))) + } + })).await + } + + fn inner_store(&self, _digest: Option) -> &'_ dyn StoreDriver { + self + } + + fn as_any<'a>(&'a self) -> &'a (dyn std::any::Any + Sync + Send + 'static) { + self + } + + fn as_any_arc(self: Arc) -> Arc { + self + } +} + +#[async_trait] +impl HealthStatusIndicator for GcsStore +where + I: InstantWrapper, + NowFn: Fn() -> I + Send + Sync + Unpin + 'static, +{ + fn get_name(&self) -> &'static str { + "GcsStore" + } + + /// Checks the health of the GCS connection. + /// + /// Implementation steps: + /// 1. Try to list a small number of objects + /// 2. Check connection to GCS + /// 3. Return appropriate health status + async fn check_health(&self, namespace: Cow<'static, str>) -> HealthStatus { + StoreDriver::check_health(Pin::new(self), namespace).await + } +} + +#[cfg(test)] +mod tests { + // Test implementation steps: + // 1. Mock GCS client for unit tests + // 2. Test all main operations: + // - new() constructor + // - has_with_results + // - update + // - get_part + // 3. Test error cases and retries + // 4. Test zero digest handling + // 5. Test multipart uploads + // 6. Test expired object handling +} \ No newline at end of file diff --git a/nativelink-store/src/lib.rs b/nativelink-store/src/lib.rs index 04040fa5b..165f75542 100644 --- a/nativelink-store/src/lib.rs +++ b/nativelink-store/src/lib.rs @@ -32,3 +32,4 @@ pub mod shard_store; pub mod size_partitioning_store; pub mod store_manager; pub mod verify_store; +pub mod gcs_store;