Skip to content

Commit

Permalink
Remove utf8validator and protolegacy build tag (#7146)
Browse files Browse the repository at this point in the history
## What changed?
Remove the utf8validator component and stop building with protolegacy.

## Why?
Let protobuf validate all strings are valid UTF-8 as specified.

## How did you test it?
Will go through normal release validation.

## Potential risks
All rpcs and persisted data containing invalid UTF-8 in strings will be
rejected immediately. We've been monitoring for invalid data in Temporal
Cloud for most of a year now and all the bad data is gone.
  • Loading branch information
dnr authored Jan 24, 2025
1 parent eea4376 commit 0682c47
Show file tree
Hide file tree
Showing 25 changed files with 4 additions and 931 deletions.
12 changes: 0 additions & 12 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,18 +212,6 @@ Please don't use very generic titles like "bug fixes".
All PR titles should start with Upper case and have no dot at the end.
## Go build and run tags
Prior to Server version v1.23.0 our protobuf code generator allowed invalid UTF-8 data to be stored as proto strings. This isn't actually allowed by the proto3 spec, so we need to specify `-tags protolegacy` when building against the server. Our Makefile does this, but if you're using temporal as a library you'll need to enable that yourself.

Example:

```shell
$ go build -tags protolegacy ./cmd/server
```

If you see an error like `grpc: error unmarshalling request: string field contains invalid UTF-8` then you've forgotten to specify this flag.
## Go version update
1. In this repository, update `go` in `go.mod`.
Expand Down
4 changes: 1 addition & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ PERSISTENCE_DRIVER ?= cassandra
TEMPORAL_DB ?= temporal
VISIBILITY_DB ?= temporal_visibility

# Always use "protolegacy" tag to allow disabling utf-8 validation on proto messages
# during proto library transition.
ALL_BUILD_TAGS := protolegacy,$(BUILD_TAG)
ALL_BUILD_TAGS := $(BUILD_TAG),
ALL_TEST_TAGS := $(ALL_BUILD_TAGS),test_dep,$(TEST_TAG)
BUILD_TAG_FLAG := -tags $(ALL_BUILD_TAGS)
TEST_TAG_FLAG := -tags $(ALL_TEST_TAGS)
Expand Down
33 changes: 0 additions & 33 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,39 +288,6 @@ operator API calls (highest priority). Should be >0.0 and <= 1.0 (defaults to 20
`How many extra goroutines can be created per root.`,
)

// utf-8 validation

ValidateUTF8SampleRPCRequest = NewGlobalFloatSetting(
"system.validateUTF8.sample.rpcRequest",
0.0,
`Sample rate of utf-8 string validation for rpc requests`,
)
ValidateUTF8SampleRPCResponse = NewGlobalFloatSetting(
"system.validateUTF8.sample.rpcResponse",
0.0,
`Sample rate of utf-8 string validation for rpc responses`,
)
ValidateUTF8SamplePersistence = NewGlobalFloatSetting(
"system.validateUTF8.sample.persistence",
0.0,
`Sample rate of utf-8 string validation for persistence [de]serialization`,
)
ValidateUTF8FailRPCRequest = NewGlobalBoolSetting(
"system.validateUTF8.fail.rpcRequest",
false,
`Whether to fail rpcs on utf-8 string validation errors`,
)
ValidateUTF8FailRPCResponse = NewGlobalBoolSetting(
"system.validateUTF8.fail.rpcResponse",
false,
`Whether to fail rpcs on utf-8 string validation errors`,
)
ValidateUTF8FailPersistence = NewGlobalBoolSetting(
"system.validateUTF8.fail.persistence",
false,
`Whether to fail persistence [de]serialization on utf-8 string validation errors`,
)

// keys for size limit

BlobSizeLimitError = NewNamespaceIntSetting(
Expand Down
4 changes: 0 additions & 4 deletions common/metrics/metric_defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -658,10 +658,6 @@ var (
"wf_too_many_pending_external_workflow_signals",
WithDescription("The number of Workflow Tasks failed because they would cause the limit on the number of pending signals to external workflows to be exceeded. See https://t.mp/limits for more information."),
)
UTF8ValidationErrors = NewCounterDef(
"utf8_validation_errors",
WithDescription("Number of times the service encountered a proto message with invalid UTF-8 in a string field"),
)

// Frontend
AddSearchAttributesWorkflowSuccessCount = NewCounterDef("add_search_attributes_workflow_success")
Expand Down
7 changes: 0 additions & 7 deletions common/nexus/payload_serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import (
commonpb "go.temporal.io/api/common/v1"
enumspb "go.temporal.io/api/enums/v1"
"go.temporal.io/server/common/persistence/serialization"
"go.temporal.io/server/common/utf8validator"
)

type payloadSerializer struct{}
Expand Down Expand Up @@ -81,9 +80,6 @@ func (payloadSerializer) Deserialize(content *nexus.Content, v any) error {
switch mediaType {
case "application/x-temporal-payload":
err := payload.Unmarshal(content.Data)
if err == nil {
err = utf8validator.Validate(payload, utf8validator.SourceRPCRequest)
}
if err != nil {
return serialization.NewDeserializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
Expand Down Expand Up @@ -180,9 +176,6 @@ func (payloadSerializer) Serialize(v any) (*nexus.Content, error) {
}

func xTemporalPayload(payload *commonpb.Payload) (*nexus.Content, error) {
if err := utf8validator.Validate(payload, utf8validator.SourceRPCResponse); err != nil {
return nil, fmt.Errorf("%w: payload marshal error: %w", errSerializer, err)
}
data, err := payload.Marshal()
if err != nil {
return nil, fmt.Errorf("%w: payload marshal error: %w", errSerializer, err)
Expand Down
7 changes: 0 additions & 7 deletions common/persistence/serialization/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
enumspb "go.temporal.io/api/enums/v1"
persistencespb "go.temporal.io/server/api/persistence/v1"
"go.temporal.io/server/common/codec"
"go.temporal.io/server/common/utf8validator"
"google.golang.org/protobuf/proto"
)

Expand Down Expand Up @@ -174,9 +173,6 @@ func decode(
}

func proto3Encode(m proto.Message) (*commonpb.DataBlob, error) {
if err := utf8validator.Validate(m, utf8validator.SourcePersistence); err != nil {
return nil, NewSerializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
blob := commonpb.DataBlob{EncodingType: enumspb.ENCODING_TYPE_PROTO3}
data, err := proto.Marshal(m)
if err != nil {
Expand All @@ -199,9 +195,6 @@ func Proto3Decode(blob []byte, e enumspb.EncodingType, result proto.Message) err
return NewUnknownEncodingTypeError(e.String(), enumspb.ENCODING_TYPE_PROTO3)
}
err := proto.Unmarshal(blob, result)
if err == nil {
err = utf8validator.Validate(result, utf8validator.SourcePersistence)
}
if err != nil {
return NewDeserializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
Expand Down
18 changes: 0 additions & 18 deletions common/persistence/serialization/serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ import (
persistencespb "go.temporal.io/server/api/persistence/v1"
replicationspb "go.temporal.io/server/api/replication/v1"
"go.temporal.io/server/common/codec"
"go.temporal.io/server/common/utf8validator"
"go.temporal.io/server/service/history/tasks"
"google.golang.org/protobuf/proto"
)
Expand Down Expand Up @@ -172,9 +171,6 @@ func (t *serializerImpl) DeserializeEvents(data *commonpb.DataBlob) ([]*historyp
default:
return nil, NewUnknownEncodingTypeError(data.EncodingType.String(), enumspb.ENCODING_TYPE_PROTO3)
}
if err == nil {
err = utf8validator.Validate(events, utf8validator.SourcePersistence)
}
if err != nil {
return nil, NewDeserializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
Expand Down Expand Up @@ -205,9 +201,6 @@ func (t *serializerImpl) DeserializeEvent(data *commonpb.DataBlob) (*historypb.H
default:
return nil, NewUnknownEncodingTypeError(data.EncodingType.String(), enumspb.ENCODING_TYPE_PROTO3)
}
if err == nil {
err = utf8validator.Validate(event, utf8validator.SourcePersistence)
}
if err != nil {
return nil, NewDeserializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
Expand Down Expand Up @@ -240,9 +233,6 @@ func (t *serializerImpl) DeserializeClusterMetadata(data *commonpb.DataBlob) (*p
default:
return nil, NewUnknownEncodingTypeError(data.EncodingType.String(), enumspb.ENCODING_TYPE_PROTO3)
}
if err == nil {
err = utf8validator.Validate(cm, utf8validator.SourcePersistence)
}
if err != nil {
return nil, NewSerializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
Expand All @@ -261,11 +251,6 @@ func (t *serializerImpl) serialize(p marshaler, encodingType enumspb.EncodingTyp
switch encodingType {
case enumspb.ENCODING_TYPE_PROTO3:
// Client API currently specifies encodingType on requests which span multiple of these objects
if msg, ok := p.(proto.Message); ok {
if err := utf8validator.Validate(msg, utf8validator.SourcePersistence); err != nil {
return nil, NewSerializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
}
data, err = p.Marshal()
default:
return nil, NewUnknownEncodingTypeError(encodingType.String(), enumspb.ENCODING_TYPE_PROTO3)
Expand Down Expand Up @@ -628,9 +613,6 @@ func ProtoEncodeBlob(m proto.Message, encoding enumspb.EncodingType) (*commonpb.
}, nil
}

if err := utf8validator.Validate(m, utf8validator.SourcePersistence); err != nil {
return nil, NewSerializationError(enumspb.ENCODING_TYPE_PROTO3, err)
}
data, err := proto.Marshal(m)
if err != nil {
return nil, NewSerializationError(enumspb.ENCODING_TYPE_PROTO3, err)
Expand Down
4 changes: 0 additions & 4 deletions common/persistence/visibility/visibility_manager_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import (
"go.temporal.io/server/common/persistence/visibility/manager"
"go.temporal.io/server/common/persistence/visibility/store"
"go.temporal.io/server/common/searchattribute"
"go.temporal.io/server/common/utf8validator"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/durationpb"
"google.golang.org/protobuf/types/known/timestamppb"
Expand Down Expand Up @@ -349,9 +348,6 @@ func deserializeMemo(data *commonpb.DataBlob) (*commonpb.Memo, error) {
case enumspb.ENCODING_TYPE_PROTO3:
memo := &commonpb.Memo{}
err := proto.Unmarshal(data.Data, memo)
if err == nil {
err = utf8validator.Validate(memo, utf8validator.SourcePersistence)
}
if err != nil {
return nil, serialization.NewDeserializationError(
enumspb.ENCODING_TYPE_PROTO3, fmt.Errorf("unable to deserialize memo from data blob: %w", err))
Expand Down
3 changes: 0 additions & 3 deletions common/resource/fx.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ import (
"go.temporal.io/server/common/searchattribute"
"go.temporal.io/server/common/telemetry"
"go.temporal.io/server/common/testing/testhooks"
"go.temporal.io/server/common/utf8validator"
"go.uber.org/fx"
"google.golang.org/grpc"
"google.golang.org/grpc/health"
Expand Down Expand Up @@ -129,9 +128,7 @@ var Module = fx.Options(
fx.Provide(health.NewServer),
deadlock.Module,
config.Module,
utf8validator.Module,
testhooks.Module,
fx.Invoke(func(*utf8validator.Validator) {}), // force this to be constructed even if not referenced elsewhere
)

var DefaultOptions = fx.Options(
Expand Down
128 changes: 1 addition & 127 deletions common/sdk/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
package sdk

import (
"encoding/base64"
"fmt"
"reflect"

commonpb "go.temporal.io/api/common/v1"
"go.temporal.io/sdk/converter"
"go.temporal.io/server/common/utf8validator"
"google.golang.org/protobuf/proto"
)

var (
Expand All @@ -41,127 +34,8 @@ var (
PreferProtoDataConverter = converter.NewCompositeDataConverter(
converter.NewNilPayloadConverter(),
converter.NewByteSlicePayloadConverter(),
// TODO: We have a local copy of ProtoPayloadConverter to add explicit utf8 validation
// checks. After we remove this explicit validation, we can switch back to the sdk version.
// converter.NewProtoPayloadConverter(),
newProtoPayloadConverter(),
converter.NewProtoPayloadConverter(),
converter.NewProtoJSONPayloadConverter(),
converter.NewJSONPayloadConverter(),
)
)

// Local copy of go.temporal.io/sdk/converter.ProtoPayloadConverter with utf8 validation checks
// and without various options:

type protoPayloadConverter struct {
}

// NewProtoPayloadConverter creates new instance of `ProtoPayloadConverter“.
func newProtoPayloadConverter() *protoPayloadConverter {
return &protoPayloadConverter{}
}

func (c *protoPayloadConverter) ToPayload(value interface{}) (*commonpb.Payload, error) {
// This implementation only supports protobuf api v2.

builtPointer := false
for {
if valueProto, ok := value.(proto.Message); ok {
err := utf8validator.Validate(valueProto, utf8validator.SourcePersistence)
if err != nil {
return nil, fmt.Errorf("%w: %v", converter.ErrUnableToEncode, err)
}
byteSlice, err := proto.Marshal(valueProto)
if err != nil {
return nil, fmt.Errorf("%w: %v", converter.ErrUnableToEncode, err)
}
return &commonpb.Payload{
Metadata: map[string][]byte{
converter.MetadataEncoding: []byte(c.Encoding()),
converter.MetadataMessageType: []byte(valueProto.ProtoReflect().Descriptor().FullName()),
},
Data: byteSlice,
}, nil
}
if builtPointer {
break
}
value = pointerTo(value).Interface()
builtPointer = true
}

return nil, nil
}

func (c *protoPayloadConverter) FromPayload(payload *commonpb.Payload, valuePtr interface{}) error {
// This implementation only supports protobuf api v2.

originalValue := reflect.ValueOf(valuePtr)
if originalValue.Kind() != reflect.Ptr {
return fmt.Errorf("type: %T: %w", valuePtr, converter.ErrValuePtrIsNotPointer)
}

originalValue = originalValue.Elem()
if !originalValue.CanSet() {
return fmt.Errorf("type: %T: %w", valuePtr, converter.ErrUnableToSetValue)
}

if originalValue.Kind() == reflect.Interface {
return fmt.Errorf("value type: %s: %w", originalValue.Type().String(), converter.ErrValuePtrMustConcreteType)
}

value := originalValue
// If original value is of value type (i.e. commonpb.WorkflowType), create a pointer to it.
if originalValue.Kind() != reflect.Ptr {
value = pointerTo(originalValue.Interface())
}

protoValue := value.Interface() // protoValue is for sure of pointer type (i.e. *commonpb.WorkflowType).
protoMessage, isProtoMessage := protoValue.(proto.Message)
if !isProtoMessage {
return fmt.Errorf("type: %T: %w", protoValue, converter.ErrTypeNotImplementProtoMessage)
}

// If original value is nil, create new instance.
if originalValue.Kind() == reflect.Ptr && originalValue.IsNil() {
value = newOfSameType(originalValue)
protoValue = value.Interface()
protoMessage = protoValue.(proto.Message) // type assertion must always succeed
}

err := proto.Unmarshal(payload.GetData(), protoMessage)
if err == nil {
err = utf8validator.Validate(protoMessage, utf8validator.SourcePersistence)
}
if err != nil {
return fmt.Errorf("%w: %v", converter.ErrUnableToDecode, err)
}

// If original value wasn't a pointer then set value back to where valuePtr points to.
if originalValue.Kind() != reflect.Ptr {
originalValue.Set(value.Elem())
}

return nil
}

func (c *protoPayloadConverter) ToString(payload *commonpb.Payload) string {
return base64.RawStdEncoding.EncodeToString(payload.GetData())
}

func (c *protoPayloadConverter) Encoding() string {
return converter.MetadataEncodingProto
}

func pointerTo(val interface{}) reflect.Value {
valPtr := reflect.New(reflect.TypeOf(val))
valPtr.Elem().Set(reflect.ValueOf(val))
return valPtr
}

func newOfSameType(val reflect.Value) reflect.Value {
valType := val.Type().Elem() // is value type (i.e. commonpb.WorkflowType)
newValue := reflect.New(valType) // is of pointer type (i.e. *commonpb.WorkflowType)
val.Set(newValue) // set newly created value back to passed value
return newValue
}
Loading

0 comments on commit 0682c47

Please sign in to comment.