diff --git a/stablehlo/dialect/StablehloOps.cpp b/stablehlo/dialect/StablehloOps.cpp
index 9b0933a2d4e..b4c11274a28 100644
--- a/stablehlo/dialect/StablehloOps.cpp
+++ b/stablehlo/dialect/StablehloOps.cpp
@@ -156,7 +156,6 @@ LogicalResult ReduceScatterOp::verify() {
   }
 
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(AddOp)
-INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(AllReduceOp)
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(AndOp)
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(Atan2Op)
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(CbrtOp)
@@ -919,6 +918,15 @@ LogicalResult AllReduceOp::verify() {
                                 getComputation());
 }
 
+LogicalResult AllReduceOp::inferReturnTypeComponents(
+    MLIRContext*, std::optional<Location> location, ValueShapeRange operands,
+    DictionaryAttr attributes, OpaqueProperties properties, RegionRange regions,
+    SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes) {
+  AllReduceOp::Adaptor adaptor(operands, attributes, properties, regions);
+  return hlo::inferAllReduceOp(location, adaptor.getOperand(),
+                               adaptor.getComputation(), inferredReturnShapes);
+}
+
 //===----------------------------------------------------------------------===//
 // BatchNormGradOp
 //===----------------------------------------------------------------------===//
@@ -1379,7 +1387,7 @@ LogicalResult ReduceWindowOp::inferReturnTypeComponents(
       location, adaptor.getInputs(), adaptor.getInitValues(),
       adaptor.getWindowDimensions(), adaptor.getWindowStrides(),
       adaptor.getBaseDilations(), adaptor.getWindowDilations(),
-      adaptor.getPadding(), inferredReturnShapes);
+      adaptor.getPadding(), adaptor.getBody(), inferredReturnShapes);
 }
 
 LogicalResult ReduceWindowOp::verify() {
@@ -1782,7 +1790,8 @@ LogicalResult ReduceOp::inferReturnTypeComponents(
   ReduceOp::Adaptor adaptor(operands, attributes, properties, regions);
   return hlo::inferReduceOp(location, adaptor.getInputs().getTypes(),
                             adaptor.getInitValues().getTypes(),
-                            adaptor.getDimensions(), inferredReturnShapes);
+                            adaptor.getDimensions(), adaptor.getBody(),
+                            inferredReturnShapes);
 }
 
 LogicalResult ReduceOp::verify() {
@@ -2295,8 +2304,8 @@ LogicalResult SelectAndScatterOp::inferReturnTypes(
     SmallVectorImpl<Type>& inferredReturnTypes) {
   SelectAndScatterOp::Adaptor adaptor(operands, attributes, properties,
                                       regions);
-  return hlo::inferSelectAndScatterOp(adaptor.getOperand(),
-                                      inferredReturnTypes);
+  return hlo::inferSelectAndScatterOp(
+      adaptor.getOperand(), adaptor.getScatter(), inferredReturnTypes);
 }
 
 LogicalResult SelectAndScatterOp::verify() {
@@ -2316,6 +2325,7 @@ LogicalResult ScatterOp::inferReturnTypes(
     SmallVectorImpl<Type>& inferredReturnTypes) {
   ScatterOp::Adaptor adaptor(operands, attributes, properties, regions);
   return hlo::inferScatterOp(location, adaptor.getInputs(),
+                             adaptor.getUpdateComputation(),
                              inferredReturnTypes);
 }
 
diff --git a/stablehlo/dialect/StablehloOps.td b/stablehlo/dialect/StablehloOps.td
index 80b6e1ea86e..e5dbfed5936 100644
--- a/stablehlo/dialect/StablehloOps.td
+++ b/stablehlo/dialect/StablehloOps.td
@@ -1327,7 +1327,7 @@ def StableHLO_AllGatherOp : StableHLO_Op<"all_gather",
 }
 
 def StableHLO_AllReduceOp : StableHLO_Op<"all_reduce",
-    [HLO_CompatibleOperandsAndResultType /*all_reduce_c6*/]> {
+    [InferTensorType /*all_reduce_c6, all_reduce_c7*/]> {
   let summary = "AllReduce operation";
   let description = [{
     Within each process group in the process grid, applies a reduction function
@@ -1362,8 +1362,7 @@ def StableHLO_AllReduceOp : StableHLO_Op<"all_reduce",
   let hasVerifier = 1;
 }
 
-def StableHLO_ReduceScatterOp : StableHLO_Op<"reduce_scatter",
-    [SameOperandsAndResultElementType /*reduce_scatter_c8*/]> {
+def StableHLO_ReduceScatterOp : StableHLO_Op<"reduce_scatter", []> {
   let summary = "ReduceScatter operation";
   let description = [{
      Within each process group in the process grid, performs reduction, using
@@ -1448,7 +1447,7 @@ def StableHLO_AllToAllOp : StableHLO_Op<"all_to_all",
 def StableHLO_ReduceOp: StableHLO_ShapedInterfaceOp<"reduce", [
       RecursiveMemoryEffects,
       SameVariadicOperandSize /*reduce_c3*/,
-      InferTensorTypeWithReify /*reduce_c7*/,
+      InferTensorTypeWithReify /*reduce_c7, reduce_c8*/,
       SingleBlockImplicitTerminator<"ReturnOp">
     ]> { /*reduce_c7*/
   let summary = "Reduce operation";
@@ -1475,8 +1474,6 @@ def StableHLO_ReduceOp: StableHLO_ShapedInterfaceOp<"reduce", [
     Variadic<HLO_Tensor>:$init_values, /*reduce_i2*/
     I64ElementsAttr:$dimensions /*reduce_i3*/
   );
-  // TODO(hinsu): Verify that the attached body arguments and results are
-  // compatible with reduce op's operands.
   let regions = (region SizedRegion<1>:$body /*reduce_i4*/);
 
   let results = (outs Variadic<HLO_Tensor>);
@@ -2514,7 +2511,8 @@ def StableHLO_DynamicReshapeOp: StableHLO_ShapedInterfaceOp<"dynamic_reshape", [
 
 def StableHLO_ScatterOp: StableHLO_Op<"scatter", [RecursiveMemoryEffects,
       SameVariadicOperandSize /*scatter_c5*/,
-      DeclareOpInterfaceMethods<InferTypeOpInterface> /*scatter_c16*/]> {
+      DeclareOpInterfaceMethods<InferTypeOpInterface> /*scatter_c16,
+      scater_c17*/]> {
   let summary = "Scatter operation";
   let description = [{
     Produces `results` tensors which are equal to `inputs` tensors except that
@@ -2587,8 +2585,8 @@ def StableHLO_SelectOp: StableHLO_Op<"select", [Pure, HLO_BroadcastingElementwis
 }
 
 def StableHLO_SelectAndScatterOp: StableHLO_Op<"select_and_scatter",
-      [DeclareOpInterfaceMethods<InferTypeOpInterface> /*select_and_scatter_c11*/,
-       RecursiveMemoryEffects]> {
+      [DeclareOpInterfaceMethods<InferTypeOpInterface> /*select_and_scatter_c11,
+      select_and_scatter_c12*/, RecursiveMemoryEffects]> {
   let summary = "SelectAndScatter operation";
   let description = [{
     Scatters the values from the `source` tensor using `scatter` based on the
diff --git a/stablehlo/dialect/TypeInference.cpp b/stablehlo/dialect/TypeInference.cpp
index 651d384753a..67fb6d821df 100644
--- a/stablehlo/dialect/TypeInference.cpp
+++ b/stablehlo/dialect/TypeInference.cpp
@@ -97,6 +97,52 @@ bool tensorsHaveSameElType(Type type1, Type type2,
   return tensorsHaveSameElType({type1, type2}, ignoreFpPrecision);
 }
 
+unsigned getBitWidth(Type type) {
+  if (auto complexTy = type.dyn_cast<ComplexType>())
+    return 2 * getBitWidth(complexTy.getElementType());
+  if (auto quantTy = type.dyn_cast<quant::QuantizedType>())
+    return getBitWidth(quantTy.getStorageType());
+  return type.getIntOrFloatBitWidth();
+}
+
+template <typename T>
+bool matchesType(Type a, Type b) {
+  bool matches = a.isa<T>() && b.isa<T>();
+  // Check that expressed type matches for quantized types
+  if constexpr (std::is_same<T, quant::QuantizedType>::value) {
+    return matches && (a.cast<quant::QuantizedType>().getExpressedType() ==
+                       b.cast<quant::QuantizedType>().getExpressedType());
+  }
+  return matches;
+}
+
+// Returns true if the element-type of type1 can be promoted to that of type2.
+// An element-type 'x' is promotatble to element-type 'y' is they have the same
+// base type and bitwidth(x) <= bitwidth(y). When 'x' and 'y' are quantized
+// element-types, then promotion is applied only to the 'storage_type'
+// component.
+bool isPromotableElementType(Type type1, Type type2,
+                             bool ignoreFpPrecision = false) {
+  auto tensorTy1 = type1.dyn_cast<ShapedType>();
+  auto tensorTy2 = type2.dyn_cast<ShapedType>();
+
+  if (!tensorTy1 || !tensorTy2) return false;
+
+  Type tensorEl1 = tensorTy1.getElementType();
+  Type tensorEl2 = tensorTy2.getElementType();
+
+  bool isSameType = matchesType<IntegerType>(tensorEl1, tensorEl2) ||
+                    matchesType<FloatType>(tensorEl1, tensorEl2) ||
+                    matchesType<ComplexType>(tensorEl1, tensorEl2) ||
+                    matchesType<quant::QuantizedType>(tensorEl1, tensorEl2);
+
+  if (!isSameType) return false;
+
+  if (ignoreFpPrecision && tensorEl1.isa<FloatType>()) return true;
+
+  return getBitWidth(tensorEl1) <= getBitWidth(tensorEl2);
+}
+
 // Return true if type1 and type2 are shape-compatible and have same element
 // type. If 'ignoreFpPrecision' is True, then allow floats with different
 // precisions while checking element-types.
@@ -405,12 +451,6 @@ SmallVector<int64_t> inferWindowOutputShape(ArrayRef<int64_t> baseShape,
   return outputDimensions;
 }
 
-unsigned potentiallyComplexBitWidth(Type type) {
-  auto complexTy = type.dyn_cast<ComplexType>();
-  return complexTy ? 2 * complexTy.getElementType().getIntOrFloatBitWidth()
-                   : type.getIntOrFloatBitWidth();
-}
-
 LogicalResult verifyReplicaGroups(std::optional<Location> location,
                                   DenseIntElementsAttr replicaGroups,
                                   bool allGroupsMustHaveSameSize,
@@ -530,6 +570,17 @@ LogicalResult verifyReduceOpInputsAndInferShape(
   return success();
 }
 
+// Returns the types of the terminator arguments of the input  mlir::Block
+// 'block'.
+SmallVector<ShapedType> getAccumulatorTypes(Block& block) {
+  SmallVector<ShapedType> accumulatorSubShapes;
+  for (Value retOperand : block.getTerminator()->getOperands()) {
+    auto shapedTy = retOperand.getType().cast<ShapedType>();
+    accumulatorSubShapes.push_back(shapedTy);
+  }
+  return accumulatorSubShapes;
+}
+
 LogicalResult verifyReducerShape(std::optional<Location> loc, Block& block,
                                  ArrayRef<ShapedType> inputTypes,
                                  ArrayRef<ShapedType> initValueTypes,
@@ -598,24 +649,35 @@ LogicalResult verifyReducerShape(std::optional<Location> loc, Block& block,
 
     // all_reduce_c5, reduce_c6, reduce_scatter_c7, reduce_window_c13,
     // reduce_window_i2, scatter_c6, scatter_c15, select_and_scatter_c10
-    if (!compatibleShapeAndElementType(accumulatorSubShapes[inputIdx],
-                                       initValueTypes[inputIdx],
-                                       /*ignoreFpPrecision=*/true))
+    if (failed(verifyCompatibleShape(initValueTypes[inputIdx],
+                                     accumulatorSubShapes[inputIdx])))
+      return emitOptionalError(
+          loc, "The shape of reduction-region's result type at index ",
+          inputIdx, " differs from the op's corresponding init-value type: ",
+          accumulatorSubShapes[inputIdx], " vs ", initValueTypes[inputIdx]);
+
+    if (!isPromotableElementType(initValueTypes[inputIdx],
+                                 accumulatorSubShapes[inputIdx],
+                                 /*ignoreFpPrecision=*/true))
       return emitOptionalError(
-          loc, "The type of reduction-region's result type at index ", inputIdx,
-          " differs from the op's corresponding init-value type: ",
+          loc, "The element-type of reduction-region's result type at index ",
+          inputIdx,
+          " is expected to be promotable from the op's corresponding "
+          "init-value element-type: ",
           accumulatorSubShapes[inputIdx], " vs ", initValueTypes[inputIdx]);
 
     // reduce_c6, reduce_window_c3, scatter_c6, scatter_c15,
     // select_and_scatter_c10
-    if (!tensorsHaveSameElType(
+    if (!isPromotableElementType(
             inputTypes[inputIdx],
-            block.getArgument(numInputs + inputIdx).getType(), true))
+            block.getArgument(numInputs + inputIdx).getType(),
+            /*ignoreFpPrecision=*/true))
       return emitOptionalError(
           loc, "The element-type of reduction-region's argument at index ",
-          numInputs + inputIdx, " is expected to be ",
+          numInputs + inputIdx, " is expected to be promotable from ",
           inputTypes[inputIdx].getElementType(), ", but got ",
-          block.getArgument(numInputs + inputIdx).getType(), " as its type.");
+          getElementTypeOrSelf(
+              block.getArgument(numInputs + inputIdx).getType()));
 
     Type blockArgType = block.getArgument(numInputs + inputIdx).getType();
     auto blockArgTensorTy = blockArgType.cast<ShapedType>();
@@ -1453,6 +1515,18 @@ LogicalResult inferAllToAllOp(
   return success();
 }
 
+LogicalResult inferAllReduceOp(
+    std::optional<Location> location, Value operand, Region& computation,
+    SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes) {
+  // all_reduce_c6, all_reduce_c7
+  SmallVector<ShapedType> accumulatorTypes =
+      getAccumulatorTypes(computation.front());
+  auto operandShapedTy = operand.getType().cast<ShapedType>();
+  inferredReturnShapes.emplace_back(getSameShapeTensorType(
+      operandShapedTy, accumulatorTypes[0].getElementType()));
+  return success();
+}
+
 LogicalResult inferBatchNormGradOp(
     std::optional<Location> location, Value operand, Value scale, Value mean,
     Value variance, Value gradOutput, int64_t featureIndex,
@@ -2532,7 +2606,7 @@ LogicalResult inferRealOp(std::optional<Location>, Value operand,
 
 LogicalResult inferReduceOp(
     std::optional<Location> location, TypeRange inputTypes,
-    TypeRange initValueTypes, DenseIntElementsAttr dimensions,
+    TypeRange initValueTypes, DenseIntElementsAttr dimensions, Region& body,
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes) {
   SmallVector<ShapedType> inputArgTensorTypes{
       llvm::map_range(inputTypes, [](Type t) { return t.cast<ShapedType>(); })};
@@ -2546,10 +2620,11 @@ LogicalResult inferReduceOp(
                                                initValueTensorTypes, dimensions,
                                                newDimensions, encoding)))
     return failure();
-  // reduce_c2, reduce_c3, reduce_c7
+  // reduce_c3, reduce_c7, reduce_c8
+  SmallVector<ShapedType> accumulatorTypes = getAccumulatorTypes(body.front());
   for (uint64_t inputIdx = 0; inputIdx < inputTypes.size(); ++inputIdx) {
     ShapedType inputType = inputArgTensorTypes[inputIdx];
-    Type elementType = inputType.getElementType();
+    Type elementType = accumulatorTypes[inputIdx].getElementType();
     if (inputType.hasRank())
       inferredReturnShapes.emplace_back(newDimensions, elementType, encoding);
     else
@@ -2565,7 +2640,7 @@ LogicalResult inferReduceWindowOp(
     std::optional<DenseIntElementsAttr> windowStrides,
     std::optional<DenseIntElementsAttr> baseDilations,
     std::optional<DenseIntElementsAttr> windowDilations,
-    std::optional<DenseIntElementsAttr> padding,
+    std::optional<DenseIntElementsAttr> padding, Region& body,
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes) {
   SmallVector<ShapedType> inputTypes{llvm::map_range(
       inputs.getTypes(), [](Type t) { return t.cast<ShapedType>(); })};
@@ -2582,21 +2657,22 @@ LogicalResult inferReduceWindowOp(
     return failure();
 
   // reduce_window_c1, reduce_window_c14...reduce_window_c16
+  SmallVector<ShapedType> accumulatorTypes = getAccumulatorTypes(body.front());
   for (size_t i = 0; i < inputTypes.size(); ++i) {
     auto inputRankedType = inputs[i].getType().dyn_cast<RankedTensorType>();
     if (!inputRankedType) {
-      inferredReturnShapes.emplace_back(inputTypes[i].getElementType());
+      inferredReturnShapes.emplace_back(accumulatorTypes[i].getElementType());
     } else {
       auto resultShape =
           inferWindowOutputShape(inputTypes[i].getShape(), inferredWindow);
       auto inputBounds = encodingToBounds(inputRankedType.getEncoding());
       if (inputBounds.empty()) {
         inferredReturnShapes.emplace_back(resultShape,
-                                          inputTypes[i].getElementType());
+                                          accumulatorTypes[i].getElementType());
       } else {
         auto resultBounds = inferWindowOutputShape(inputBounds, inferredWindow);
         inferredReturnShapes.emplace_back(
-            resultShape, inputTypes[i].getElementType(),
+            resultShape, accumulatorTypes[i].getElementType(),
             boundsToEncoding(inputRankedType.getEncoding(), resultBounds));
       }
     }
@@ -2661,8 +2737,16 @@ LogicalResult inferRngOp(
 }
 
 LogicalResult inferScatterOp(std::optional<Location>, ValueRange inputs,
+                             Region& updateComputation,
                              SmallVectorImpl<Type>& inferredReturnTypes) {
-  llvm::append_range(inferredReturnTypes, inputs.getTypes());
+  // scatter_c16, scatter_c17
+  SmallVector<ShapedType> accumulatorTypes =
+      getAccumulatorTypes(updateComputation.front());
+  for (uint64_t inputIdx = 0; inputIdx < inputs.size(); ++inputIdx) {
+    auto inputShapedTy = inputs[inputIdx].getType().cast<ShapedType>();
+    inferredReturnTypes.push_back(getSameShapeTensorType(
+        inputShapedTy, accumulatorTypes[inputIdx].getElementType()));
+  }
   return success();
 }
 
@@ -2692,9 +2776,14 @@ LogicalResult inferSelectOp(
 }
 
 LogicalResult inferSelectAndScatterOp(
-    Value operand, SmallVectorImpl<Type>& inferredReturnTypes) {
-  // select_and_scatter_c11
-  inferredReturnTypes.push_back(operand.getType());
+    Value operand, Region& scatter,
+    SmallVectorImpl<Type>& inferredReturnTypes) {
+  // select_and_scatter_c11, select_and_scatter_c12
+  SmallVector<ShapedType> accumulatorTypes =
+      getAccumulatorTypes(scatter.front());
+  auto operandShapedTy = operand.getType().cast<ShapedType>();
+  inferredReturnTypes.push_back(getSameShapeTensorType(
+      operandShapedTy, accumulatorTypes[0].getElementType()));
   return success();
 }
 
@@ -3139,8 +3228,8 @@ LogicalResult verifyBitcastConvertOp(std::optional<Location> location,
         location, "cannot convert between real and complex types, but got: ",
         operandShapedType, " and ", targetShapedType);
 
-  auto targetEltBitWidth = potentiallyComplexBitWidth(targetElt);
-  auto operandEltBitWidth = potentiallyComplexBitWidth(operandElt);
+  auto targetEltBitWidth = getBitWidth(targetElt);
+  auto operandEltBitWidth = getBitWidth(operandElt);
 
   auto operandType = operandShapedType.dyn_cast<RankedTensorType>();
   auto targetType = targetShapedType.dyn_cast<RankedTensorType>();
@@ -3821,6 +3910,16 @@ LogicalResult verifyReduceScatterOp(std::optional<Location> location,
           operandType.getDimSize(index), ") and result (",
           resultType.getDimSize(index), ")");
   }
+
+  // reduce_scatter_c9
+  SmallVector<ShapedType> accumulatorTypes =
+      getAccumulatorTypes(computation.front());
+  if (resultType.getElementType() != accumulatorTypes[0].getElementType()) {
+    return emitOptionalError(location, "result element-type is expected to be ",
+                             accumulatorTypes[0].getElementType(), ", but got ",
+                             resultType.getElementType());
+  }
+
   return success();
 }
 
diff --git a/stablehlo/dialect/TypeInference.h b/stablehlo/dialect/TypeInference.h
index 107e98392fe..abc87e22d2a 100644
--- a/stablehlo/dialect/TypeInference.h
+++ b/stablehlo/dialect/TypeInference.h
@@ -120,6 +120,10 @@ LogicalResult inferAllToAllOp(
     DenseIntElementsAttr replicaGroups,
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
 
+LogicalResult inferAllReduceOp(
+    std::optional<Location> location, Value operand, Region& computation,
+    SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
+
 LogicalResult inferBatchNormGradOp(
     std::optional<Location> location, Value operand, Value scale, Value mean,
     Value variance, Value gradOutput, int64_t featureIndex,
@@ -281,7 +285,7 @@ LogicalResult inferRealOp(std::optional<Location> location, Value operand,
 
 LogicalResult inferReduceOp(
     std::optional<Location> location, TypeRange inputTypes,
-    TypeRange initValueTypes, DenseIntElementsAttr dimensions,
+    TypeRange initValueTypes, DenseIntElementsAttr dimensions, Region& body,
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
 
 LogicalResult inferReduceWindowOp(
@@ -290,7 +294,7 @@ LogicalResult inferReduceWindowOp(
     std::optional<DenseIntElementsAttr> windowStrides,
     std::optional<DenseIntElementsAttr> baseDilations,
     std::optional<DenseIntElementsAttr> windowDilations,
-    std::optional<DenseIntElementsAttr> padding,
+    std::optional<DenseIntElementsAttr> padding, Region& body,
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
 
 LogicalResult inferReplicaIdOp(MLIRContext* context, std::optional<Location>,
@@ -306,7 +310,7 @@ LogicalResult inferRngOp(
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
 
 LogicalResult inferScatterOp(std::optional<Location> location,
-                             ValueRange inputs,
+                             ValueRange inputs, Region& update_computation,
                              SmallVectorImpl<Type>& inferredReturnTypes);
 
 LogicalResult inferSelectOp(
@@ -314,7 +318,7 @@ LogicalResult inferSelectOp(
     SmallVectorImpl<ShapedTypeComponents>& inferredReturnShapes);
 
 LogicalResult inferSelectAndScatterOp(
-    Value operand, SmallVectorImpl<Type>& inferredReturnTypes);
+    Value operand, Region& scatter, SmallVectorImpl<Type>& inferredReturnTypes);
 
 LogicalResult inferSendOp(HloDialectInterface* dialect,
                           std::optional<Location> location,
diff --git a/stablehlo/dialect/Version.h b/stablehlo/dialect/Version.h
index f8c92b7f1c7..0fe2385f4fa 100644
--- a/stablehlo/dialect/Version.h
+++ b/stablehlo/dialect/Version.h
@@ -38,7 +38,7 @@ class Version {
   static FailureOr<Version> fromString(llvm::StringRef versionRef);
 
   /// Return a Version representing the current VHLO dialect version.
-  static Version getCurrentVersion() { return Version(0, 16, 3); }
+  static Version getCurrentVersion() { return Version(0, 17, 0); }
 
   /// Return a Version representing the minimum supported VHLO dialect version.
   static Version getMinimumVersion() { return Version(0, 9, 0); }
diff --git a/stablehlo/dialect/VhloDialect.td b/stablehlo/dialect/VhloDialect.td
index fe1d4d5d174..608f2a17855 100644
--- a/stablehlo/dialect/VhloDialect.td
+++ b/stablehlo/dialect/VhloDialect.td
@@ -34,6 +34,7 @@ def VHLO_Dialect : Dialect {
       0.14.0: MLIR bytecode version 3 => 5 (revised to 4 in #1827).
       0.15.0: MLIR bytecode version 5 => 6, use properties in VHLO.
       0.16.0: Introduce `collective_broadcast` operation.
+      0.17.0: Allow reduce operations to promote to higher bitwidth.
   }];
 
   let useDefaultAttributePrinterParser = 0;
diff --git a/stablehlo/dialect/VhloOps.cpp b/stablehlo/dialect/VhloOps.cpp
index a3428b1b465..e8c651e58a6 100644
--- a/stablehlo/dialect/VhloOps.cpp
+++ b/stablehlo/dialect/VhloOps.cpp
@@ -24,6 +24,7 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Dialect/Quant/QuantOps.h"
+#include "mlir/Dialect/Quant/QuantTypes.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinAttributeInterfaces.h"
@@ -37,6 +38,7 @@ limitations under the License.
 #include "mlir/Support/LogicalResult.h"
 #include "stablehlo/dialect/AssemblyFormat.h"
 #include "stablehlo/dialect/VhloBytecode.h"
+#include "stablehlo/dialect/VhloTypes.h"
 
 namespace mlir {
 namespace vhlo {
@@ -296,5 +298,75 @@ void VhloDialect::printAttribute(Attribute attr, DialectAsmPrinter& os) const {
   assert(succeeded(result));
 }
 
+///////////////////////////
+// Op Constraint Versioning
+///////////////////////////
+// These could be migrated to ODS in VhloOps.td if we figured out a better way
+// to represent this sort of constraint in tablegen.
+
+namespace {
+Type getVhloElementType(Type tensorType) {
+  if (auto ranked = tensorType.dyn_cast<RankedTensorV1Type>()) {
+    return ranked.getElementType();
+  }
+  return tensorType.cast<UnrankedTensorV1Type>().getElementType();
+}
+
+bool checkIfOperandAndResultElementTypesMatch(TypeRange operandTypes,
+                                              TypeRange resultTypes) {
+  SmallVector<Type> inputElementTypes{llvm::map_range(
+      operandTypes, [](Type t) { return getVhloElementType(t); })};
+  SmallVector<Type> resultElementTypes{llvm::map_range(
+      resultTypes, [](Type t) { return getVhloElementType(t); })};
+
+  if (llvm::any_of(
+          llvm::zip(inputElementTypes, resultElementTypes),
+          [&](auto pair) { return std::get<0>(pair) != std::get<1>(pair); }))
+    return true;
+
+  return false;
+}
+
+// Allow mismatched operand and result types in reduce ops in v0.17.0
+LogicalResult verifyConstraint_0_17_0(mlir::Operation* op,
+                                      Version targetVersion) {
+  if (checkIfOperandAndResultElementTypesMatch(op->getOperandTypes(),
+                                               op->getResultTypes()) &&
+      targetVersion < Version(0, 17, 0))
+    return failure();
+  return success();
+}
+}  // namespace
+
+LogicalResult AllReduceOpV1::validateConstraint(mlir::Operation* op,
+                                                Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
+LogicalResult ReduceOpV1::validateConstraint(mlir::Operation* op,
+                                             Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
+LogicalResult ReduceScatterOpV1::validateConstraint(mlir::Operation* op,
+                                                    Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
+LogicalResult ReduceWindowOpV1::validateConstraint(mlir::Operation* op,
+                                                   Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
+LogicalResult ScatterOpV1::validateConstraint(mlir::Operation* op,
+                                              Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
+LogicalResult SelectAndScatterOpV1::validateConstraint(mlir::Operation* op,
+                                                       Version targetVersion) {
+  return verifyConstraint_0_17_0(op, targetVersion);
+}
+
 }  // namespace vhlo
 }  // namespace mlir
diff --git a/stablehlo/dialect/VhloOps.td b/stablehlo/dialect/VhloOps.td
index 0456360e199..edd515a2279 100644
--- a/stablehlo/dialect/VhloOps.td
+++ b/stablehlo/dialect/VhloOps.td
@@ -35,6 +35,17 @@ def VHLO_VersionedOpInterface : OpInterface<"VersionedOpInterface"> {
   ];
 }
 
+def VHLO_VersionedOpConstraintInterface : OpInterface<"VersionedOpConstraintInterface"> {
+  let cppNamespace = "::mlir::vhlo";
+  let methods = [
+    InterfaceMethod<
+      [{Validate versioned constraints on a versioned op.
+       Used if the spec'ed constraints of an op change over time.}],
+      "mlir::LogicalResult", "validateConstraint",
+      (ins "mlir::Operation*":$op, "mlir::vhlo::Version":$targetVersion)>,
+  ];
+}
+
 class VHLO_Op<string mnemonic, string minVersion, string maxVersion, list<Trait> traits = []> :
     Op<VHLO_Dialect, mnemonic,
       [DeclareOpInterfaceMethods<VHLO_VersionedOpInterface>] # traits> {
@@ -92,7 +103,8 @@ def VHLO_AllGatherOpV1 : VHLO_Op<"all_gather_v1", "0.9.0", "current"> {
   let results = (outs VHLO_AnyType:$result);
 }
 
-def VHLO_AllReduceOpV1 : VHLO_Op<"all_reduce_v1", "0.9.0", "current"> {
+def VHLO_AllReduceOpV1 : VHLO_Op<"all_reduce_v1", "0.9.0", "current",
+    [DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     VHLO_AnyType:$operand,
     VHLO_AnyAttr:$replica_groups,
@@ -754,7 +766,8 @@ def VHLO_RecvOpV1 : VHLO_Op<"recv_v1", "0.9.0", "current"> {
   let results = (outs Variadic<VHLO_AnyType>:$results);
 }
 
-def VHLO_ReduceOpV1 : VHLO_Op<"reduce_v1", "0.9.0", "current", [SameVariadicOperandSize]> {
+def VHLO_ReduceOpV1 : VHLO_Op<"reduce_v1", "0.9.0", "current",
+    [SameVariadicOperandSize,  DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     Variadic<VHLO_AnyType>:$inputs,
     Variadic<VHLO_AnyType>:$init_values,
@@ -773,7 +786,8 @@ def VHLO_ReducePrecisionOpV1 : VHLO_Op<"reduce_precision_v1", "0.9.0", "current"
   let results = (outs VHLO_AnyType:$output);
 }
 
-def VHLO_ReduceScatterOpV1 : VHLO_Op<"reduce_scatter_v1", "0.9.0", "current"> {
+def VHLO_ReduceScatterOpV1 : VHLO_Op<"reduce_scatter_v1", "0.9.0", "current",
+    [DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     VHLO_AnyType:$operand,
     VHLO_AnyAttr:$scatter_dimension,
@@ -785,7 +799,8 @@ def VHLO_ReduceScatterOpV1 : VHLO_Op<"reduce_scatter_v1", "0.9.0", "current"> {
   let results = (outs VHLO_AnyType:$result);
 }
 
-def VHLO_ReduceWindowOpV1 : VHLO_Op<"reduce_window_v1", "0.9.0", "current", [SameVariadicOperandSize]> {
+def VHLO_ReduceWindowOpV1 : VHLO_Op<"reduce_window_v1", "0.9.0", "current",
+    [SameVariadicOperandSize, DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     Variadic<VHLO_AnyType>:$inputs,
     Variadic<VHLO_AnyType>:$init_values,
@@ -864,7 +879,8 @@ def VHLO_RsqrtOpV1 : VHLO_Op<"rsqrt_v1", "0.9.0", "current"> {
   let results = (outs VHLO_AnyType:$result);
 }
 
-def VHLO_ScatterOpV1 : VHLO_Op<"scatter_v1", "0.9.0", "current", [SameVariadicOperandSize]> {
+def VHLO_ScatterOpV1 : VHLO_Op<"scatter_v1", "0.9.0", "current",
+    [SameVariadicOperandSize, DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     Variadic<VHLO_AnyType>:$inputs,
     VHLO_AnyType:$scatter_indices,
@@ -880,7 +896,8 @@ def VHLO_ScatterOpV1 : VHLO_Op<"scatter_v1", "0.9.0", "current", [SameVariadicOp
   let results = (outs Variadic<VHLO_AnyType>:$results);
 }
 
-def VHLO_SelectAndScatterOpV1 : VHLO_Op<"select_and_scatter_v1", "0.9.0", "current"> {
+def VHLO_SelectAndScatterOpV1 : VHLO_Op<"select_and_scatter_v1", "0.9.0", "current",
+    [DeclareOpInterfaceMethods<VHLO_VersionedOpConstraintInterface>]> {
   let arguments = (ins
     VHLO_AnyType:$operand,
     VHLO_AnyType:$source,
diff --git a/stablehlo/reference/Ops.cpp b/stablehlo/reference/Ops.cpp
index 58bbac38dac..a24021f1c08 100644
--- a/stablehlo/reference/Ops.cpp
+++ b/stablehlo/reference/Ops.cpp
@@ -82,7 +82,7 @@ SmallVector<Tensor> evalReduceOp(ArrayRef<Tensor> inputs,
   Builder builder(inputs[0].getType().getContext());
   auto reduceStatus = hlo::inferReduceOp(
       /*location=*/{}, inputTypes, initValueTypes,
-      builder.getI64TensorAttr(dimensions), inferredReduceTypes);
+      builder.getI64TensorAttr(dimensions), body, inferredReduceTypes);
   if (failed(reduceStatus))
     report_fatal_error(
         invalidArgument("Could not infer ReduceOp's return type"));
diff --git a/stablehlo/tests/infer_stablehlo.mlir b/stablehlo/tests/infer_stablehlo.mlir
index e1cfe718520..727cf770604 100644
--- a/stablehlo/tests/infer_stablehlo.mlir
+++ b/stablehlo/tests/infer_stablehlo.mlir
@@ -102,6 +102,24 @@ func.func @cholesky(%arg0: tensor<1x2x2xf32>) -> tensor<1x2x2xindex> {
 
 // -----
 
+// CHECK-LABEL: func @all_reduce_c6_c7
+func.func @all_reduce_c6_c7(%operand: tensor<10xf32>) -> tensor<10xindex> {
+
+  %0 = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<f64>, %arg1: tensor<f64>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%0) : (tensor<f64>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<10xf32>) -> tensor<10xf64>
+  // CHECK: types0 = tensor<10xf64>
+  %1 = "hlo_test_infer.get_return_types"(%0) : (tensor<10xf64>) -> tensor<10xindex>
+  func.return %1 : tensor<10xindex>
+}
+
+// -----
+
 // CHECK-LABEL: func @all_to_all_c9
 func.func @all_to_all_c9(%data: tensor<4x16xf32>) -> tensor<16x4xindex> {
   %0 = "stablehlo.all_to_all"(%data) {
@@ -584,8 +602,8 @@ func.func @after_all_empty_arg() -> !stablehlo.token {
 
 // -----
 
-// CHECK: func @select_and_scatter_c11
-func.func @select_and_scatter_c11(
+// CHECK: func @select_and_scatter_c11_c12
+func.func @select_and_scatter_c11_c12(
   %arg0: tensor<10x24x24x64xf32>,
   %arg1: tensor<10x12x12x64xf32>) -> tensor<10x24x24x64xindex> {
   %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
@@ -612,8 +630,35 @@ func.func @select_and_scatter_c11(
 
 // -----
 
-// CHECK-LABEL: func @scatter_c16
-func.func @scatter_c16(%input_tensor: tensor<200x100x300xf32>,
+// CHECK: func @select_and_scatter_c11_c12
+func.func @select_and_scatter_c11_c12(
+  %arg0: tensor<10x24x24x64xf32>,
+  %arg1: tensor<10x12x12x64xf32>) -> tensor<10x24x24x64xindex> {
+  %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
+  %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
+  ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+    %2 = "stablehlo.compare"(%arg3, %arg4) {
+      compare_type = #stablehlo<comparison_type TOTALORDER>,
+      comparison_direction = #stablehlo<comparison_direction GE>
+      } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+    "stablehlo.return"(%2) : (tensor<i1>) -> ()
+  },  {
+  ^bb0(%arg3: tensor<f64>, %arg4: tensor<f64>):
+    %2 = stablehlo.add %arg3, %arg4 : tensor<f64>
+    "stablehlo.return"(%2) : (tensor<f64>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<10x24x24x64xf32>, tensor<10x12x12x64xf32>, tensor<f32>) ->
+        tensor<10x24x24x64xf64>
+  %2 = "hlo_test_infer.get_return_types"(%1) : (tensor<10x24x24x64xf64>) -> tensor<10x24x24x64xindex>
+  func.return %2 :  tensor<10x24x24x64xindex>
+}
+
+// -----
+
+// CHECK-LABEL: func @scatter_c16_c17
+func.func @scatter_c16_c17(%input_tensor: tensor<200x100x300xf32>,
     %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
       tensor<200x100x300xindex> {
   %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
@@ -638,6 +683,32 @@ func.func @scatter_c16(%input_tensor: tensor<200x100x300xf32>,
 
 // -----
 
+// CHECK-LABEL: func @scatter_c16_c17
+func.func @scatter_c16_c17(%input_tensor: tensor<200x100x300xf32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
+      tensor<200x100x300xindex> {
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<f64>, %rhs: tensor<f64>):
+    %add = stablehlo.add %lhs, %rhs : tensor<f64>
+    "stablehlo.return"(%add) : (tensor<f64>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) ->
+      tensor<200x100x300xf64>
+  // CHECK: types0 = tensor<200x100x300xf64>
+  %1 = "hlo_test_infer.get_return_types"(%0) : (tensor<200x100x300xf64>) -> tensor<200x100x300xindex>
+  func.return %1 : tensor<200x100x300xindex>
+}
+
+// -----
+
 // CHECK-LABEL: func @scatter_bounds
 func.func @scatter_bounds(%input_tensor: tensor<200x?x?xf32, #stablehlo.bounds<?, ?, 301>>,
     %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
@@ -864,7 +935,24 @@ func.func @reduce_c7(%arg0: tensor<7x5xf32>, %arg1 : tensor<5xf32>) -> tensor<6x
 
 // -----
 
-func.func @reduce_c7(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
+func.func @reduce_c8(%arg0: tensor<4x4xf32>, %arg1 : tensor<f32>)
+    -> (tensor<4xf32>) {
+  // expected-error@+2 {{failed to infer returned types}}
+  // expected-error@+1{{'stablehlo.reduce' op inferred type(s) 'tensor<4xf64>' are incompatible with return type(s) of operation 'tensor<4xf32>'}}
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+
+  ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xf32>, tensor<f32>) -> tensor<4xf32>
+
+  func.return %0: tensor<4xf32>
+}
+
+// -----
+
+func.func @reduce_c3_c7(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
     %arg2: tensor<f32>, %arg3: tensor<i32>) -> (tensor<?xf32>) {
   // expected-error@+2 {{failed to infer returned types}}
   // expected-error@+1 {{inferred type(s) 'tensor<?xf32>', 'tensor<?xi32>' are incompatible with return type(s) of operation 'tensor<?xf32>', 'tensor<?xi32>', 'tensor<?xi32>'}}
@@ -882,7 +970,7 @@ func.func @reduce_c7(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
 
 // -----
 
-func.func @reduce_c7(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
+func.func @reduce_c7_c8(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
     %arg2: tensor<f32>, %arg3: tensor<i32>) -> (tensor<?xf32>) {
   // expected-error@+2 {{failed to infer returned types}}
   // expected-error@+1 {{'stablehlo.reduce' op inferred type(s) 'tensor<?xf32>', 'tensor<?xi32>' are incompatible with return type(s) of operation 'tensor<?xf32>', 'tensor<?x?xf32>'}}
@@ -900,7 +988,7 @@ func.func @reduce_c7(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
 
 // -----
 
-func.func @reduce_c7(%arg0: tensor<?x?xf32>, %arg1 : tensor<f32>)
+func.func @reduce_c8(%arg0: tensor<?x?xf32>, %arg1 : tensor<f32>)
     -> (tensor<?xi32>) {
   // expected-error@+2 {{failed to infer returned types}}
   // expected-error@+1 {{'stablehlo.reduce' op inferred type(s) 'tensor<?xf32>' are incompatible with return type(s) of operation 'tensor<?xi32>'}}
@@ -1023,6 +1111,25 @@ func.func @reduce_window_c16(%arg0: tensor<4x2xf32>,
 
 // -----
 
+func.func @reduce_window_c16(%arg0: tensor<4x2xf32>, %init0: tensor<f32>) ->
+        (tensor<2x2xf32>) {
+  // expected-error@+2 {{failed to infer returned types}}
+  // expected-error@+1 {{inferred type(s) 'tensor<2x2xf64>' are incompatible with return type(s) of operation 'tensor<2x2xf32>'}}
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<f64>, %b0: tensor<f64>):
+              %1 = stablehlo.add %a0, %b0 : tensor<f64>
+              "stablehlo.return"(%1) : (tensor<f64>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2xf32>, tensor<f32>) -> (tensor<2x2xf32>)
+  func.return %0 : tensor<2x2xf32>
+}
+
+// -----
+
 //===----------------------------------------------------------------------===//
 // Bounded Dynamism
 //===----------------------------------------------------------------------===//
diff --git a/stablehlo/tests/ops_stablehlo.mlir b/stablehlo/tests/ops_stablehlo.mlir
index c7b33ddd21e..2b7c6a4a3c3 100644
--- a/stablehlo/tests/ops_stablehlo.mlir
+++ b/stablehlo/tests/ops_stablehlo.mlir
@@ -27,6 +27,40 @@ func.func @all_reduce(%operand: tensor<10xf32>) -> tensor<10xf32> {
   func.return %0 : tensor<10xf32>
 }
 
+// -----
+
+// CHECK-LABEL: func @all_reduce_with_promotable_types
+func.func @all_reduce_with_promotable_types(%operand: tensor<f32>) -> tensor<f64> {
+
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<f64>, %arg1: tensor<f64>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%0) : (tensor<f64>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<f32>) -> tensor<f64>
+
+  func.return %result : tensor<f64>
+}
+
+// -----
+
+// CHECK-LABEL: func @all_reduce_with_promotable_quantized_types
+func.func @all_reduce_with_promotable_quantized_types(%operand: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>)
+    -> tensor<!quant.uniform<i32:f32, 2.000000e+00:15>> {
+
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %arg1: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+      %0 = stablehlo.add %arg0, %arg1 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+      "stablehlo.return"(%0) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+
+  func.return %result : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
 
 // -----
 
@@ -186,7 +220,7 @@ func.func @all_reduce_c5(%operand: tensor<10xf32>) -> tensor<10xf32> {
 // -----
 
 func.func @all_reduce_c5(%operand: tensor<10xf32>) -> tensor<10xf32> {
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<i32>' vs 'tensor<f32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i32>' vs 'tensor<f32>'}}
   %0 = "stablehlo.all_reduce"(%operand) ({
   ^bb0(%arg0: tensor<i32>, %arg1: tensor<i32>):
     %max = stablehlo.maximum %arg0, %arg1 : tensor<i32>
@@ -201,7 +235,7 @@ func.func @all_reduce_c5(%operand: tensor<10xf32>) -> tensor<10xf32> {
 // -----
 
 func.func @all_reduce_c5(%operand: tensor<10xf32>) -> tensor<10xf32> {
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<4xf32>' vs 'tensor<f32>'}}
+  // expected-error@+1 {{The shape of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<4xf32>' vs 'tensor<f32>'}}
   %0 = "stablehlo.all_reduce"(%operand) ({
   ^bb0(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>):
     %max = stablehlo.maximum %arg0, %arg1 : tensor<4xf32>
@@ -215,6 +249,40 @@ func.func @all_reduce_c5(%operand: tensor<10xf32>) -> tensor<10xf32> {
 
 // -----
 
+func.func @all_reduce_c5(%operand: tensor<i32>) -> tensor<i8> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<i8>, %arg1: tensor<i8>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+      "stablehlo.return"(%0) : (tensor<i8>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<i32>) -> tensor<i8>
+
+  func.return %result : tensor<i8>
+}
+// -----
+
+func.func @all_reduce_c5(%operand: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>)
+    -> tensor<!quant.uniform<i32:f64, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %arg1: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>):
+      %0 = stablehlo.add %arg0, %arg1 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+      "stablehlo.return"(%0) : (tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+
+  func.return %result : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
+
+// -----
+
 // CHECK-LABEL: func @reduce_scatter
 func.func @reduce_scatter(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
   %0 = "stablehlo.reduce_scatter"(%data) ({
@@ -245,6 +313,38 @@ func.func @reduce_scatter_dynamic(%data: tensor<?x?xf32>) -> tensor<?x?xf32> {
 
 // -----
 
+// CHECK-LABEL: func @reduce_scatter_with_promotable_types
+func.func @reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> {
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xf32>) -> tensor<4x4xf64>
+  func.return %0 : tensor<4x4xf64>
+}
+
+// -----
+
+// CHECK-LABEL: func @reduce_scatter_with_promotable_quantized_types
+func.func @reduce_scatter_with_promotable_quantized_types(
+    %data: tensor<4x16x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+    tensor<4x4x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %arg3: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    "stablehlo.return"(%1) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16x!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+  func.return %0 : tensor<4x4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_scatter_c2(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
   // expected-error@+1 {{expects scatter_dimension >= 0}}
   %0 = "stablehlo.reduce_scatter"(%data) ({
@@ -404,7 +504,7 @@ func.func @reduce_scatter_c7(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
 // -----
 
 func.func @reduce_scatter_c7(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<i32>' vs 'tensor<f32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i32>' vs 'tensor<f32>'}}
   %0 = "stablehlo.reduce_scatter"(%data) ({
     ^bb0(%arg2: tensor<i32>, %arg3: tensor<i32>):
     %1 = stablehlo.add %arg2, %arg3 : tensor<i32>
@@ -416,6 +516,39 @@ func.func @reduce_scatter_c7(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
 
 // -----
 
+func.func @reduce_scatter_c7(%data: tensor<4x16xi32>) -> tensor<4x4xi8> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<i8>, %arg3: tensor<i8>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<i8>
+    "stablehlo.return"(%1) : (tensor<i8>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xi32>) -> tensor<4x4xi8>
+  func.return %0 : tensor<4x4xi8>
+}
+
+// -----
+
+func.func @reduce_scatter_c7(%data: tensor<4x16x!quant.uniform<i8:f32, 2.000000e+00:15>>)
+  -> tensor<4x4x!quant.uniform<i32:f64, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %arg3: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+    "stablehlo.return"(%1) : (tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16x!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x4x!quant.uniform<i32:f64, 2.000000e+00:15>>
+  func.return %0 : tensor<4x4x!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_scatter_c8(%data: tensor<4x16xf32>) -> tensor<4xf32> {
   // expected-error@+1 {{operand and result should have same rank}}
   %0 = "stablehlo.reduce_scatter"(%data) ({
@@ -455,6 +588,21 @@ func.func @reduce_scatter_c8(%data: tensor<4x16xf32>) -> tensor<3x4xf32> {
 
 // -----
 
+func.func @reduce_scatter_c9(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
+  // expected-error@+1 {{result element-type is expected to be 'f64', but got 'f32'}}
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xf32>) -> tensor<4x4xf32>
+  func.return %0 : tensor<4x4xf32>
+}
+
+// -----
+
 func.func @reduce_scatter_i3(%data: tensor<4x16xf32>) -> tensor<4x4xf32> {
   // expected-error@+1 {{replica groups should be a rank 2 tensor}}
   %0 = "stablehlo.reduce_scatter"(%data) ({
diff --git a/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir b/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir
new file mode 100644
index 00000000000..acd729402ff
--- /dev/null
+++ b/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir
@@ -0,0 +1,2410 @@
+// RUN: stablehlo-opt --mlir-print-op-generic %s.bc | FileCheck %s
+// RUN: stablehlo-translate --deserialize %s.bc | stablehlo-translate --serialize --target=0.17.0 | stablehlo-opt --mlir-print-op-generic | FileCheck %s
+// RUN: diff <(stablehlo-translate --deserialize %s.bc | stablehlo-opt) <(stablehlo-opt --strip-debuginfo %s)
+// RUN: diff %s.bc <(stablehlo-translate --serialize --target=0.17.0 --strip-debuginfo %s)
+
+// CHECK-WARN-NOT: Not Implemented
+
+// ============ ATTRIBUTES ============
+
+// CHECK-LABEL: "attr_comparison_direction_eq"
+func.func @attr_comparison_direction_eq(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 EQ>
+    comparison_direction = #stablehlo<comparison_direction EQ>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_direction_ne"
+func.func @attr_comparison_direction_ne(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 NE>
+    comparison_direction = #stablehlo<comparison_direction NE>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_direction_ge"
+func.func @attr_comparison_direction_ge(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 GE>
+    comparison_direction = #stablehlo<comparison_direction GE>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_direction_gt"
+func.func @attr_comparison_direction_gt(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 GT>
+    comparison_direction = #stablehlo<comparison_direction GT>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_direction_le"
+func.func @attr_comparison_direction_le(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 LE>
+    comparison_direction = #stablehlo<comparison_direction LE>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_direction_lt"
+func.func @attr_comparison_direction_lt(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    // CHECK: comparison_direction = #vhlo<comparison_direction_v1 LT>
+    comparison_direction = #stablehlo<comparison_direction LT>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_type_notype"
+func.func @attr_comparison_type_notype(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>
+    // CHECK: compare_type = #vhlo<comparison_type_v1 NOTYPE>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_type_float"
+func.func @attr_comparison_type_float(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>,
+    // CHECK: compare_type = #vhlo<comparison_type_v1 FLOAT>,
+    compare_type = #stablehlo<comparison_type FLOAT>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_type_totalorder"
+func.func @attr_comparison_type_totalorder(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>,
+    // CHECK: compare_type = #vhlo<comparison_type_v1 TOTALORDER>,
+    compare_type = #stablehlo<comparison_type TOTALORDER>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_type_signed"
+func.func @attr_comparison_type_signed(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>,
+    // CHECK: compare_type = #vhlo<comparison_type_v1 SIGNED>,
+    compare_type = #stablehlo<comparison_type SIGNED>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "attr_comparison_type_unsigned"
+func.func @attr_comparison_type_unsigned(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>,
+    // CHECK: compare_type = #vhlo<comparison_type_v1 UNSIGNED>,
+    compare_type = #stablehlo<comparison_type UNSIGNED>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// ConvDimensionNumbers aka #stablehlo.conv is covered below.
+
+// CHECK-LABEL: "attr_custom_call_api_version_unspecified"
+func.func @attr_custom_call_api_version_unspecified(%arg0: tensor<f32>) -> tensor<f32> {
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo",
+    // CHECK: api_version = #vhlo<api_version_v1 API_VERSION_UNSPECIFIED>
+    api_version = 0 : i32
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "attr_custom_call_api_version_original"
+func.func @attr_custom_call_api_version_original(%arg0: tensor<f32>) -> tensor<f32> {
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo",
+    // CHECK: api_version = #vhlo<api_version_v1 API_VERSION_ORIGINAL>
+    api_version = 1 : i32
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "attr_custom_call_api_version_status_returning"
+func.func @attr_custom_call_api_version_status_returning(%arg0: tensor<f32>) -> tensor<f32> {
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo",
+    // CHECK: api_version = #vhlo<api_version_v1 API_VERSION_STATUS_RETURNING>
+    api_version = 2 : i32
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "attr_custom_call_api_version_status_returning_unified"
+func.func @attr_custom_call_api_version_status_returning_unified(%arg0: tensor<f32>) -> tensor<f32> {
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo",
+    // CHECK: api_version = #vhlo<api_version_v1 API_VERSION_STATUS_RETURNING_UNIFIED>
+    api_version = 3 : i32
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "attr_dict"
+// CHECK: #vhlo.dict_v1<{#vhlo.string_v1<"attr1"> = #vhlo.integer_v1<1 : i32>, #vhlo.string_v1<"attr2"> = #vhlo.integer_v1<2 : i32>}
+func.func @attr_dict() attributes {stablehlo.attr = {attr1 = 1 : i32, attr2 = 2 : i32}} {
+  return
+}
+
+// DotDimensionNumbers aka #stablehlo.dot is covered below.
+
+// CHECK-LABEL: "attr_fft_type_fft"
+func.func @attr_fft_type_fft(%arg0: tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>> {
+  %0 = "stablehlo.fft"(%arg0) {
+    // CHECK: fft_type = #vhlo<fft_type_v1 FFT>
+    fft_type = #stablehlo<fft_type FFT>,
+    fft_length = array<i64: 16>
+  } : (tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>>
+  func.return %0 : tensor<16xcomplex<f32>>
+}
+
+// CHECK-LABEL: "attr_fft_type_ifft"
+func.func @attr_fft_type_ifft(%arg0: tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>> {
+  %0 = "stablehlo.fft"(%arg0) {
+    // CHECK: fft_type = #vhlo<fft_type_v1 IFFT>
+    fft_type = #stablehlo<fft_type IFFT>,
+    fft_length = array<i64: 16>
+  } : (tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>>
+  func.return %0 : tensor<16xcomplex<f32>>
+}
+
+// CHECK-LABEL: "attr_fft_type_rfft"
+func.func @attr_fft_type_rfft(%arg0: tensor<16xf32>) -> tensor<9xcomplex<f32>> {
+  %0 = "stablehlo.fft"(%arg0) {
+    // CHECK: fft_type = #vhlo<fft_type_v1 RFFT>
+    fft_type = #stablehlo<fft_type RFFT>,
+    fft_length = array<i64: 16>
+  } : (tensor<16xf32>) -> tensor<9xcomplex<f32>>
+  func.return %0 : tensor<9xcomplex<f32>>
+}
+
+// CHECK-LABEL: "attr_fft_type_irfft"
+func.func @attr_fft_type_irfft(%arg0: tensor<9xcomplex<f32>>) -> tensor<16xf32> {
+  %0 = "stablehlo.fft"(%arg0) {
+    // CHECK: fft_type = #vhlo<fft_type_v1 IRFFT>
+    fft_type = #stablehlo<fft_type IRFFT>,
+    fft_length = array<i64: 16>
+  } : (tensor<9xcomplex<f32>>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// GatherDimensionNumbers aka #stablehlo.gather is covered below.
+
+// CHECK-LABEL: "attr_precision_config_default"
+func.func @attr_precision_config_default(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  %0 = "stablehlo.dot"(%arg0, %arg1) {
+    // CHECK: precision_config = #vhlo.array_v1<[#vhlo<precision_v1 DEFAULT>, #vhlo<precision_v1 DEFAULT>]>
+  } : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "attr_precision_config_high"
+func.func @attr_precision_config_high(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  %0 = "stablehlo.dot"(%arg0, %arg1) {
+    // CHECK: precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGH>, #vhlo<precision_v1 HIGH>]>
+    precision_config = [#stablehlo<precision HIGH>, #stablehlo<precision HIGH>]
+  } : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "attr_precision_config_highest"
+func.func @attr_precision_config_highest(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  %0 = "stablehlo.dot"(%arg0, %arg1) {
+    // CHECK: precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGHEST>, #vhlo<precision_v1 HIGHEST>]>
+    precision_config = [#stablehlo<precision HIGHEST>, #stablehlo<precision HIGHEST>]
+  } : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "attr_rng_algorithm_default"
+func.func @attr_rng_algorithm_default(%arg0: tensor<f32>) -> (tensor<f32>, tensor<f32>) {
+  %0:2 = "stablehlo.rng_bit_generator"(%arg0) {
+    // CHECK: rng_algorithm = #vhlo<rng_algorithm_v1 DEFAULT>
+    rng_algorithm = #stablehlo<rng_algorithm DEFAULT>
+  } : (tensor<f32>) -> (tensor<f32>, tensor<f32>)
+  func.return %0#0, %0#1 : tensor<f32>, tensor<f32>
+}
+
+// CHECK-LABEL: "attr_rng_algorithm_three_fry"
+func.func @attr_rng_algorithm_three_fry(%arg0: tensor<f32>) -> (tensor<f32>, tensor<f32>) {
+  %0:2 = "stablehlo.rng_bit_generator"(%arg0) {
+    // CHECK: rng_algorithm = #vhlo<rng_algorithm_v1 THREE_FRY>
+    rng_algorithm = #stablehlo<rng_algorithm THREE_FRY>
+  } : (tensor<f32>) -> (tensor<f32>, tensor<f32>)
+  func.return %0#0, %0#1 : tensor<f32>, tensor<f32>
+}
+
+// CHECK-LABEL: "attr_rng_algorithm_philox"
+func.func @attr_rng_algorithm_philox(%arg0: tensor<f32>) -> (tensor<f32>, tensor<f32>) {
+  %0:2 = "stablehlo.rng_bit_generator"(%arg0) {
+    // CHECK: rng_algorithm = #vhlo<rng_algorithm_v1 PHILOX>
+    rng_algorithm = #stablehlo<rng_algorithm PHILOX>
+  } : (tensor<f32>) -> (tensor<f32>, tensor<f32>)
+  func.return %0#0, %0#1 : tensor<f32>, tensor<f32>
+}
+
+// CHECK-LABEL: "attr_rng_distribution_uniform"
+func.func @attr_rng_distribution_uniform(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<?xindex>) -> tensor<f32> {
+  %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) {
+    // CHECK: rng_distribution = #vhlo<rng_distribution_v1 UNIFORM>
+    rng_distribution = #stablehlo<rng_distribution UNIFORM>
+  } : (tensor<f32>, tensor<f32>, tensor<?xindex>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "attr_rng_distribution_normal"
+func.func @attr_rng_distribution_normal(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<?xindex>) -> tensor<f32> {
+  %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) {
+    // CHECK: rng_distribution = #vhlo<rng_distribution_v1 NORMAL>
+    rng_distribution = #stablehlo<rng_distribution NORMAL>
+  } : (tensor<f32>, tensor<f32>, tensor<?xindex>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// ScatterDimensionNumbers aka #stablehlo.scatter is covered below.
+
+// CHECK-LABEL: "attr_transpose_no_transpose"
+func.func @attr_transpose_no_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) ->  tensor<16x16xf32> {
+  %0 = "stablehlo.triangular_solve"(%arg0, %arg1) {
+    left_side = true,
+    lower = true,
+    unit_diagonal = true,
+    // transpose_a = #vhlo<transpose_v1 NO_TRANSPOSE>,
+    transpose_a = #stablehlo<transpose NO_TRANSPOSE>
+  } : (tensor<16x16xf32>, tensor<16x16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "attr_transpose_transpose"
+func.func @attr_transpose_transpose(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) ->  tensor<16x16xf32> {
+  %0 = "stablehlo.triangular_solve"(%arg0, %arg1) {
+    left_side = true,
+    lower = true,
+    unit_diagonal = true,
+    // transpose_a = #vhlo<transpose_v1 TRANSPOSE>,
+    transpose_a = #stablehlo<transpose TRANSPOSE>
+  } : (tensor<16x16xf32>, tensor<16x16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "attr_transpose_adjoint"
+func.func @attr_transpose_adjoint(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) ->  tensor<16x16xf32> {
+  %0 = "stablehlo.triangular_solve"(%arg0, %arg1) {
+    left_side = true,
+    lower = true,
+    unit_diagonal = true,
+    // transpose_a = #vhlo<transpose_v1 ADJOINT>,
+    transpose_a = #stablehlo<transpose ADJOINT>
+  } : (tensor<16x16xf32>, tensor<16x16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// TypeExtensionsAttr aka #stablehlo.type_extensions is covered below.
+
+// CHECK-LABEL: "attr_type_extensions_bounds"
+func.func @attr_type_extensions_bounds(
+    %arg0: tensor<?x?xf32, #stablehlo.type_extensions<bounds = [16, ?]>>)
+    -> tensor<?x?xf32, #stablehlo.type_extensions<bounds = [16, ?]>> {
+  // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1<?x?x!vhlo.f32_v1, #vhlo.type_extensions_v1<bounds = [16, ?]>>) -> ()
+  func.return %arg0 : tensor<?x?xf32, #stablehlo.type_extensions<bounds = [16, ?]>>
+}
+
+
+// ============ DEFAULTS ============
+
+// CHECK-LABEL: "default_all_gather"
+func.func @default_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> {
+  //               CHECK: "vhlo.all_gather_v1"(%arg0) <{
+  //          CHECK-SAME:   all_gather_dim = #vhlo.integer_v1<1 : i64>
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<false>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.all_gather"(%arg0) {
+    all_gather_dim = 1 : i64,
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>
+  } : (tensor<16x8xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "default_all_reduce"
+func.func @default_all_reduce(%arg0: tensor<f32>) -> tensor<f32> {
+  //               CHECK: "vhlo.all_reduce_v1"(%arg0)
+  //          CHECK-SAME: <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<false>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+
+  %0 = "stablehlo.all_reduce"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.add"(%arg1, %arg2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "default_all_to_all"
+func.func @default_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> {
+  //               CHECK: "vhlo.all_to_all_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  //          CHECK-SAME:   concat_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>>,
+  //          CHECK-SAME:   split_count = #vhlo.integer_v1<4 : i64>
+  //          CHECK-SAME:   split_dimension = #vhlo.integer_v1<1 : i64>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x4x!vhlo.f32_v1>
+  %0 = "stablehlo.all_to_all"(%arg0) {
+    split_dimension = 1 : i64,
+    concat_dimension = 0 : i64,
+    split_count = 4 : i64,
+    replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>
+  } : (tensor<4x16xf32>) -> tensor<16x4xf32>
+  func.return %0 : tensor<16x4xf32>
+}
+
+// CHECK-LABEL: "default_cholesky"
+func.func @default_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> {
+  //      CHECK: "vhlo.cholesky_v1"(%arg0) <{
+  // CHECK-SAME:   lower = #vhlo.bool_v1<false>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.cholesky"(%arg0) : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32>
+  func.return %0 : tensor<1x16x16xf32>
+}
+
+// CHECK-LABEL: "default_collective_permute"
+func.func @default_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> {
+  //               CHECK: "vhlo.collective_permute_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   source_target_pairs = #vhlo.tensor_v1<dense<[[0, 1], [1, 2], [2, 3]]> : tensor<3x2xi64>>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1>
+  %0 = "stablehlo.collective_permute"(%arg0) {
+    source_target_pairs = dense<[[0, 1], [1, 2], [2, 3]]> : tensor<3x2xi64>
+  } : (tensor<16x8xf32>) -> tensor<16x8xf32>
+  func.return %0 : tensor<16x8xf32>
+}
+
+// CHECK-LABEL: "default_collective_broadcast"
+func.func @default_collective_broadcast(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> {
+  //               CHECK: "vhlo.collective_broadcast_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0, 1]]> : tensor<1x2xi64>>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1>
+  %0 = "stablehlo.collective_broadcast"(%arg0) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>
+  } : (tensor<16x8xf32>) -> tensor<16x8xf32>
+  func.return %0 : tensor<16x8xf32>
+}
+
+// CHECK-LABEL: "default_compare"
+func.func @default_compare(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  //      CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   compare_type = #vhlo<comparison_type_v1 NOTYPE>,
+  // CHECK-SAME:   comparison_direction = #vhlo<comparison_direction_v1 EQ>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "default_convolution"
+func.func @default_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32> {
+  //      CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   batch_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   feature_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   input_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   input_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   input_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   kernel_input_feature_dimension = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   kernel_output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   kernel_spatial_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   lhs_dilation = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>,
+  // CHECK-SAME:   output_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   output_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<0> : tensor<2x2xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 DEFAULT>, #vhlo<precision_v1 DEFAULT>]>,
+  // CHECK-SAME:   rhs_dilation = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>,
+  // CHECK-SAME:   window_reversal = #vhlo.tensor_v1<dense<false> : tensor<2xi1>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x8x8x207x!vhlo.f32_v1>, !vhlo.tensor_v1<3x3x207x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x6x6x16x!vhlo.f32_v1>
+  %0 = "stablehlo.convolution"(%arg0, %arg1) {
+    dimension_numbers = #stablehlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>,
+    feature_group_count = 1 : i64,
+    batch_group_count = 1 : i64
+  } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>) -> tensor<1x6x6x16xf32>
+  func.return %0 : tensor<1x6x6x16xf32>
+}
+
+// CHECK-LABEL: "default_custom_call"
+func.func @default_custom_call(%arg0: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.custom_call_v1"(%arg0) <{
+  // CHECK-SAME:   api_version = #vhlo<api_version_v1 API_VERSION_ORIGINAL>,
+  // CHECK-SAME:   backend_config = #vhlo.string_v1<"">,
+  // CHECK-SAME:   call_target_name = #vhlo.string_v1<"foo">,
+  // CHECK-SAME:   called_computations = #vhlo.array_v1<[]>,
+  // CHECK-SAME:   has_side_effect = #vhlo.bool_v1<false>,
+  // CHECK-SAME:   operand_layouts = #vhlo.array_v1<[]>,
+  // CHECK-SAME:   output_operand_aliases = #vhlo.array_v1<[]>
+  // CHECK-SAME:   result_layouts = #vhlo.array_v1<[]>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo"
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "default_dot_general"
+func.func @default_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> {
+  //      CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   lhs_batching_dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   lhs_contracting_dimensions = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 DEFAULT>, #vhlo<precision_v1 DEFAULT>]>,
+  // CHECK-SAME:   rhs_batching_dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   rhs_contracting_dimensions = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<8x16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dot_general"(%arg0, %arg1) {
+    dot_dimension_numbers = #stablehlo.dot<
+      lhs_batching_dimensions = [0],
+      lhs_contracting_dimensions = [2],
+      rhs_batching_dimensions = [0],
+      rhs_contracting_dimensions = [1]
+    >
+  } : (tensor<8x8x16xf32>, tensor<8x16x8xf32>) -> tensor<8x8x8xf32>
+  func.return %0 : tensor<8x8x8xf32>
+}
+
+// CHECK-LABEL: "default_dot"
+func.func @default_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  //      CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 DEFAULT>, #vhlo<precision_v1 DEFAULT>]>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dot"(%arg0, %arg1) : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "default_dynamic_broadcast_in_dim"
+func.func @default_dynamic_broadcast_in_dim(%arg0: tensor<?x?xf32>, %arg1: tensor<2xindex>) -> tensor<?x?xf32> {
+  //      CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   broadcast_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   known_expanding_dimensions = #vhlo.tensor_v1<dense<> : tensor<0xi64>>,
+  // CHECK-SAME:   known_nonexpanding_dimensions = #vhlo.tensor_v1<dense<> : tensor<0xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<?x?x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x?x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_broadcast_in_dim"(%arg0, %arg1) {
+    broadcast_dimensions = array<i64: 0, 1>
+  } : (tensor<?x?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
+  func.return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: "default_dynamic_conv"
+func.func @default_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> {
+  //      CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   batch_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   feature_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   input_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   input_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   input_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   kernel_input_feature_dimension = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   kernel_output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   kernel_spatial_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   lhs_dilation = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>,
+  // CHECK-SAME:   output_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   output_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<0> : tensor<2x2xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 DEFAULT>, #vhlo<precision_v1 DEFAULT>]>,
+  // CHECK-SAME:   rhs_dilation = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>,
+  // CHECK-SAME:   window_reversal = #vhlo.tensor_v1<dense<false> : tensor<2xi1>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<1> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x8x8x207x!vhlo.f32_v1>, !vhlo.tensor_v1<3x3x207x16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x?x?x16x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_conv"(%arg0, %arg1, %arg2) {
+    dimension_numbers = #stablehlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>,
+    feature_group_count = 1 : i64,
+    batch_group_count = 1 : i64
+  } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32>
+  func.return %0 : tensor<1x?x?x16xf32>
+}
+
+// CHECK-LABEL: "default_dynamic_gather"
+func.func @default_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> {
+  //      CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   collapsed_slice_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<false>,
+  // CHECK-SAME:   offset_dims = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   start_index_map = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<2x4x9x!vhlo.f32_v1>, !vhlo.tensor_v1<1x5x2x!vhlo.i32_v1>, !vhlo.tensor_v1<3x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x5x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_gather"(%arg0, %arg1, %arg2) {
+    dimension_numbers = #stablehlo.gather<
+      offset_dims = [2],
+      collapsed_slice_dims = [0, 1],
+      start_index_map = [0, 1],
+      index_vector_dim = 2
+    >
+  } : (tensor<2x4x9xf32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xf32>
+  func.return %0 : tensor<1x5x8xf32>
+}
+
+func.func @default_func(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK:      "vhlo.func_v1"() <{
+  // CHECK-SAME:   arg_attrs = #vhlo.array_v1<[]>,
+  // CHECK-SAME:   function_type = #vhlo.type_v1<!vhlo.func_v1<(!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>>>,
+  // CHECK-SAME:   res_attrs = #vhlo.array_v1<[]>,
+  // CHECK-SAME:   sym_name = #vhlo.string_v1<"default_func">,
+  // CHECK-SAME:   sym_visibility = #vhlo.string_v1<"">
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : () -> ()
+  func.return %arg0 : tensor<f32>
+}
+
+// CHECK-LABEL: "dynamic_gather"
+func.func @dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> {
+  //      CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   collapsed_slice_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<false>,
+  // CHECK-SAME:   offset_dims = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   slice_sizes = #vhlo.tensor_v1<dense<1> : tensor<3xi64>>,
+  // CHECK-SAME:   start_index_map = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<2x4x9x!vhlo.f32_v1>, !vhlo.tensor_v1<1x5x2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x5x1x!vhlo.f32_v1>
+  %0 = "stablehlo.gather"(%arg0, %arg1) {
+    dimension_numbers = #stablehlo.gather<
+      offset_dims = [2],
+      collapsed_slice_dims = [0, 1],
+      start_index_map = [0, 1],
+      index_vector_dim = 2
+    >,
+    slice_sizes = dense<1> : tensor<3xi64>
+  } : (tensor<2x4x9xf32>, tensor<1x5x2xi32>) -> tensor<1x5x1xf32>
+  func.return %0 : tensor<1x5x1xf32>
+}
+
+// CHECK-LABEL: "default_infeed"
+func.func @default_infeed(%arg0: !stablehlo.token) -> (tensor<f32>, !stablehlo.token) {
+  //               CHECK: "vhlo.infeed_v1"(%arg0) <{
+  //          CHECK-SAME:   infeed_config = #vhlo.string_v1<"">,
+  // CHECK-SAME{LITERAL}:   layout = #vhlo.array_v1<[]>
+  //          CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1)
+  %0:2 = "stablehlo.infeed"(%arg0) : (!stablehlo.token) -> (tensor<f32>, !stablehlo.token)
+  func.return %0#0, %0#1 : tensor<f32>, !stablehlo.token
+}
+
+// CHECK-LABEL: "default_outfeed"
+func.func @default_outfeed(%arg0: tensor<f32>, %arg1: !stablehlo.token) -> !stablehlo.token {
+  //      CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   outfeed_config = #vhlo.string_v1<"">
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1) -> !vhlo.token_v1
+  %0 = "stablehlo.outfeed"(%arg0, %arg1) : (tensor<f32>, !stablehlo.token) -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "default_recv"
+func.func @default_recv(%arg0: !stablehlo.token) -> (tensor<f32>, !stablehlo.token) {
+  //      CHECK: "vhlo.recv_v1"(%arg0) <{
+  // CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   channel_type = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   is_host_transfer = #vhlo.bool_v1<false>
+  // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1)
+  %0:2 = "stablehlo.recv"(%arg0) {
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 1>
+  } : (!stablehlo.token) -> (tensor<f32>, !stablehlo.token)
+  func.return %0#0, %0#1 : tensor<f32>, !stablehlo.token
+}
+
+// CHECK-LABEL: "default_send"
+func.func @default_send(%arg0: tensor<f32>, %arg1: !stablehlo.token) -> !stablehlo.token {
+  //      CHECK: "vhlo.send_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   channel_type = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   is_host_transfer = #vhlo.bool_v1<false>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1) -> !vhlo.token_v1
+  %0 = "stablehlo.send"(%arg0, %arg1) {
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 1>
+  } : (tensor<f32>, !stablehlo.token) -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "default_reduce_scatter"
+func.func @default_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //               CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   scatter_dimension = #vhlo.integer_v1<0 : i64>
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<false>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.reduce_scatter"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.add"(%arg1, %arg2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    scatter_dimension = 0 : i64,
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>
+  } : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "default_reduce_window"
+func.func @default_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor<f32>) -> tensor<2x16x30x7xf32> {
+  //               CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1)  <{
+  //          CHECK-SAME:   base_dilations = #vhlo.tensor_v1<dense<1> : tensor<4xi64>>,
+  // CHECK-SAME{LITERAL}:   padding = #vhlo.tensor_v1<dense<0> : tensor<4x2xi64>>,
+  //          CHECK-SAME:   window_dilations = #vhlo.tensor_v1<dense<1> : tensor<4xi64>>,
+  //          CHECK-SAME:   window_dimensions = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  //          CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<1> : tensor<4xi64>>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.maximum_v1"(%[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<2x17x31x7x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<2x16x30x7x!vhlo.f32_v1>
+  %0 = "stablehlo.reduce_window"(%arg0, %arg1) ({
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+      %1 = "stablehlo.maximum"(%arg2, %arg3) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<2x17x31x7xf32>, tensor<f32>) -> tensor<2x16x30x7xf32>
+  func.return %0 : tensor<2x16x30x7xf32>
+}
+
+// CHECK-LABEL: "default_scatter"
+func.func @default_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> {
+  //      CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<false>,
+  // CHECK-SAME:   inserted_window_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   scatter_dims_to_operand_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   unique_indices = #vhlo.bool_v1<false>,
+  // CHECK-SAME:   update_window_dims = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG4:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>
+  %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) -> tensor<200x100x300xf32>
+  func.return %0 : tensor<200x100x300xf32>
+}
+
+// CHECK-LABEL: "default_select_and_scatter"
+func.func @default_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<10x23x23x64xf32>, %arg2: tensor<f32>) -> tensor<10x24x24x64xf32> {
+  //      CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<0> : tensor<4x2xi64>>,
+  // CHECK-SAME:   window_dimensions = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<1> : tensor<4xi64>>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG31:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG41:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL11:.*]] = "vhlo.compare_v1"(%[[ARG31]], %[[ARG41]]) <{compare_type = #vhlo<comparison_type_v1 TOTALORDER>, comparison_direction = #vhlo<comparison_direction_v1 GE>}>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL11]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT: }, {
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG32:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG42:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL12:.*]] = "vhlo.add_v1"(%[[ARG32]], %[[ARG42]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL12]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>, !vhlo.tensor_v1<10x23x23x64x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>
+  %0 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg3, %arg4) {compare_type = #stablehlo<comparison_type TOTALORDER>, comparison_direction = #stablehlo<comparison_direction GE>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }, {
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<10x24x24x64xf32>, tensor<10x23x23x64xf32>, tensor<f32>) -> tensor<10x24x24x64xf32>
+  func.return %0 : tensor<10x24x24x64xf32>
+}
+
+// CHECK-LABEL: "default_sort"
+func.func @default_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.sort_v1"(%arg0) <{
+  // CHECK-SAME:   dimension = #vhlo.integer_v1<-1 : i64>
+  // CHECK-SAME:   is_stable = #vhlo.bool_v1<false>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.compare_v1"(%[[ARG1]], %[[ARG2]]) <{compare_type = #vhlo<comparison_type_v1 FLOAT>, comparison_direction = #vhlo<comparison_direction_v1 GT>}>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.sort"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg1, %arg2) {compare_type = #stablehlo<comparison_type FLOAT>, comparison_direction = #stablehlo<comparison_direction GT>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }) : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// ============ OPS ============
+
+// CHECK-LABEL: "op_abs"
+func.func @op_abs(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.abs"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_add"
+func.func @op_add(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_after_all"
+func.func @op_after_all(%arg0: !stablehlo.token) -> !stablehlo.token {
+  // CHECK: "vhlo.after_all_v1"(%arg0) : (!vhlo.token_v1) -> !vhlo.token_v1
+  %0 = "stablehlo.after_all"(%arg0) : (!stablehlo.token) -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "op_all_gather"
+func.func @op_all_gather(%arg0: tensor<16x8xf32>) -> tensor<16x16xf32> {
+  //               CHECK: "vhlo.all_gather_v1"(%arg0) <{
+  //          CHECK-SAME:   all_gather_dim = #vhlo.integer_v1<1 : i64>
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<true>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.all_gather"(%arg0) {
+    all_gather_dim = 1 : i64,
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+    use_global_device_ids
+  } : (tensor<16x8xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "op_all_reduce"
+func.func @op_all_reduce(%arg0: tensor<f32>) -> tensor<f32> {
+  //               CHECK: "vhlo.all_reduce_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<true>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.all_reduce"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.add"(%arg1, %arg2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+    use_global_device_ids
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_all_reduce_with_promotable_types"
+func.func @op_all_reduce_with_promotable_types(%operand: tensor<f32>) -> tensor<f64> {
+  //  CHECK: "vhlo.all_reduce_v1"(%arg0)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f64_v1>
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<f64>, %arg1: tensor<f64>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%0) : (tensor<f64>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+    use_global_device_ids
+  } : (tensor<f32>) -> tensor<f64>
+
+  func.return %result : tensor<f64>
+}
+
+// CHECK-LABEL: "op_all_to_all"
+func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> {
+  //               CHECK: "vhlo.all_to_all_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<1 : i64>,
+  //          CHECK-SAME:   concat_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>>,
+  //          CHECK-SAME:   split_count = #vhlo.integer_v1<4 : i64>
+  //          CHECK-SAME:   split_dimension = #vhlo.integer_v1<1 : i64>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x4x!vhlo.f32_v1>
+  %0 = "stablehlo.all_to_all"(%arg0) {
+    split_dimension = 1 : i64,
+    concat_dimension = 0 : i64,
+    split_count = 4 : i64,
+    replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>
+  } : (tensor<4x16xf32>) -> tensor<16x4xf32>
+  func.return %0 : tensor<16x4xf32>
+}
+
+// CHECK-LABEL: "op_and"
+func.func @op_and(%arg0: tensor<i1>, %arg1: tensor<i1>) -> tensor<i1> {
+  // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.and"(%arg0, %arg1) : (tensor<i1>, tensor<i1>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "op_atan2"
+func.func @op_atan2(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.atan2_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.atan2"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_batch_norm_grad"
+func.func @op_batch_norm_grad(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) {
+  //      CHECK: "vhlo.batch_norm_grad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{
+  // CHECK-SAME:   epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>,
+  // CHECK-SAME:   feature_index = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>)
+  %0:3 = "stablehlo.batch_norm_grad"(%arg0, %arg1, %arg2, %arg3, %arg4) {
+    epsilon = 0.001 : f32,
+    feature_index = 0 : i64
+  } : (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16x16x16x16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>)
+  func.return %0#0, %0#1, %0#2 : tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_batch_norm_inference"
+func.func @op_batch_norm_inference(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>, %arg3: tensor<16xf32>, %arg4: tensor<16xf32>) -> tensor<16x16x16x16xf32> {
+  //      CHECK: "vhlo.batch_norm_inference_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) <{
+  // CHECK-SAME:   epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>,
+  // CHECK-SAME:   feature_index = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.batch_norm_inference"(%arg0, %arg1, %arg2, %arg3, %arg4) {
+    epsilon = 0.001 : f32,
+    feature_index = 0 : i64
+  } : (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>, tensor<16xf32>) -> tensor<16x16x16x16xf32>
+  func.return %0 : tensor<16x16x16x16xf32>
+}
+
+// CHECK-LABEL: "op_batch_norm_training"
+func.func @op_batch_norm_training(%arg0: tensor<16x16x16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) {
+  //      CHECK: "vhlo.batch_norm_training_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   epsilon = #vhlo.float_v1<1.000000e-03 : !vhlo.f32_v1>,
+  // CHECK-SAME:   feature_index = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>) -> (!vhlo.tensor_v1<16x16x16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x!vhlo.f32_v1>)
+  %0:3 = "stablehlo.batch_norm_training"(%arg0, %arg1, %arg2) {
+    epsilon = 0.001 : f32,
+    feature_index = 0 : i64
+  } : (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>) -> (tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>)
+  func.return %0#0, %0#1, %0#2 : tensor<16x16x16x16xf32>, tensor<16xf32>, tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_bitcast_convert"
+func.func @op_bitcast_convert(%arg0: tensor<i32>) -> tensor<f32> {
+  // CHECK: "vhlo.bitcast_convert_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.bitcast_convert"(%arg0) : (tensor<i32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_broadcast_in_dim"
+func.func @op_broadcast_in_dim(%arg0: tensor<16xf32>) -> tensor<16x16xf32> {
+  //      CHECK: "vhlo.broadcast_in_dim_v1"(%arg0) <{
+  // CHECK-SAME:   broadcast_dimensions = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.broadcast_in_dim"(%arg0) {
+    broadcast_dimensions = array<i64: 1>
+  } : (tensor<16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "op_broadcast"
+func.func @op_broadcast(%arg0: tensor<16xf32>) -> tensor<16x16xf32> {
+  //      CHECK: "vhlo.broadcast_v1"(%arg0) <{
+  // CHECK-SAME:   broadcast_sizes = #vhlo.tensor_v1<dense<16> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.broadcast"(%arg0) {
+    broadcast_sizes = array<i64: 16>
+  } : (tensor<16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "op_case"
+func.func @op_case(%arg0: tensor<i32>, %arg1: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.case_v1"(%arg0) ({
+  // CHECK-NEXT:   "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.case"(%arg0) ({
+    "stablehlo.return"(%arg1) : (tensor<f32>) -> ()
+  }) : (tensor<i32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_cbrt"
+func.func @op_cbrt(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.cbrt_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.cbrt"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_ceil"
+func.func @op_ceil(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.ceil_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.ceil"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_cholesky"
+func.func @op_cholesky(%arg0: tensor<1x16x16xf32>) -> tensor<1x16x16xf32> {
+  //      CHECK: "vhlo.cholesky_v1"(%arg0) <{
+  // CHECK-SAME:   lower = #vhlo.bool_v1<true>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.cholesky"(%arg0) {
+    lower = true
+  } : (tensor<1x16x16xf32>) -> tensor<1x16x16xf32>
+  func.return %0 : tensor<1x16x16xf32>
+}
+
+// CHECK-LABEL: "op_clamp"
+func.func @op_clamp(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.clamp_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.clamp"(%arg0, %arg1, %arg2) : (tensor<f32>, tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_count_leading_zeros"
+func.func @op_count_leading_zeros(%arg0: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.count_leading_zeros_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.count_leading_zeros"(%arg0) : (tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_collective_permute"
+func.func @op_collective_permute(%arg0: tensor<16x8xf32>) -> tensor<16x8xf32> {
+  //               CHECK: "vhlo.collective_permute_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME{LITERAL}:   source_target_pairs = #vhlo.tensor_v1<dense<[[0, 1], [1, 2], [2, 3]]> : tensor<3x2xi64>>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x8x!vhlo.f32_v1>
+  %0 = "stablehlo.collective_permute"(%arg0) {
+    source_target_pairs = dense<[[0, 1], [1, 2], [2, 3]]> : tensor<3x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>
+  } : (tensor<16x8xf32>) -> tensor<16x8xf32>
+  func.return %0 : tensor<16x8xf32>
+}
+
+// CHECK-LABEL: "op_compare"
+func.func @op_compare(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
+  //      CHECK: "vhlo.compare_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   compare_type = #vhlo<comparison_type_v1 TOTALORDER>,
+  // CHECK-SAME:   comparison_direction = #vhlo<comparison_direction_v1 EQ>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.compare"(%arg0, %arg1) {
+    comparison_direction = #stablehlo<comparison_direction EQ>,
+    compare_type = #stablehlo<comparison_type TOTALORDER>
+  } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "op_complex"
+func.func @op_complex(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<complex<f32>> {
+  // CHECK: "vhlo.complex_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>
+  %0 = "stablehlo.complex"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<complex<f32>>
+  func.return %0 : tensor<complex<f32>>
+}
+
+// CHECK-LABEL: "op_compute_reshape_shape"
+func.func @op_compute_reshape_shape(%arg0: index, %arg1: tensor<1xindex>) -> tensor<1xindex> {
+  // CHECK: "vhlo.compute_reshape_shape_v1"(%arg0, %arg1) : (!vhlo.index_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1<1x!vhlo.index_v1>
+  %0 = "stablehlo.compute_reshape_shape"(%arg0, %arg1) : (index, tensor<1xindex>) -> tensor<1xindex>
+  func.return %0 : tensor<1xindex>
+}
+
+// CHECK-LABEL: "op_concatenate"
+func.func @op_concatenate(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.concatenate_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.concatenate"(%arg0, %arg1) {
+    dimension = 0 : i64
+  } : (tensor<8xf32>, tensor<8xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_constant"
+func.func @op_constant(%arg0: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.constant_v1"() <{
+  // CHECK-SAME:   value = #vhlo.tensor_v1<dense<0.000000e+00> : tensor<f32>>
+  // CHECK-SAME: }> : () -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.constant"() {
+    value = dense<0.0> : tensor<f32>
+  } : () -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_convert"
+func.func @op_convert(%arg0: tensor<i32>) -> tensor<f32> {
+  // CHECK: "vhlo.convert_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.convert"(%arg0) : (tensor<i32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_convolution"
+func.func @op_convolution(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32> {
+  //      CHECK: "vhlo.convolution_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   batch_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   feature_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   input_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   input_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   input_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   kernel_input_feature_dimension = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   kernel_output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   kernel_spatial_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   lhs_dilation = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>,
+  // CHECK-SAME:   output_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   output_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<1> : tensor<2x2xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGHEST>, #vhlo<precision_v1 HIGHEST>]>,
+  // CHECK-SAME:   rhs_dilation = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>,
+  // CHECK-SAME:   window_reversal = #vhlo.tensor_v1<dense<true> : tensor<2xi1>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x8x8x207x!vhlo.f32_v1>, !vhlo.tensor_v1<3x3x207x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<1x7x7x16x!vhlo.f32_v1>
+  %0 = "stablehlo.convolution"(%arg0, %arg1) {
+    window_strides = dense<2> : tensor<2xi64>,
+    padding = dense<1> : tensor<2x2xi64>,
+    lhs_dilation = dense<2> : tensor<2xi64>,
+    rhs_dilation = dense<2> : tensor<2xi64>,
+    window_reversal = dense<true> : tensor<2xi1>,
+    dimension_numbers = #stablehlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>,
+    feature_group_count = 1 : i64,
+    batch_group_count = 1 : i64,
+    precision_config = [#stablehlo<precision HIGHEST>, #stablehlo<precision HIGHEST>]
+  } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>) -> tensor<1x7x7x16xf32>
+  func.return %0 : tensor<1x7x7x16xf32>
+}
+
+// CHECK-LABEL: "op_cosine"
+func.func @op_cosine(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.cosine_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.cosine"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_create_token"
+func.func @op_create_token() -> !stablehlo.token {
+  // CHECK: "vhlo.create_token_v1"() : () -> !vhlo.token_v1
+  %0 = "stablehlo.create_token"() : () -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "op_cross_replica_sum"
+func.func @op_cross_replica_sum(%arg0: tensor<f32>) -> tensor<f32> {
+  //               CHECK: "vhlo.cross-replica-sum_v1"(%arg0) <{
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>
+  //          CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.cross-replica-sum"(%arg0) {
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_cstr_reshapable"
+func.func @op_cstr_reshapable(%arg0: index, %arg1: tensor<1xindex>) -> !shape.witness {
+  // CHECK: "vhlo.cstr_reshapable_v1"(%arg0, %arg1) : (!vhlo.index_v1, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.witness_v1
+  %0 = "stablehlo.cstr_reshapable"(%arg0, %arg1) : (index, tensor<1xindex>) -> !shape.witness
+  func.return %0 : !shape.witness
+}
+
+// CHECK-LABEL: "op_custom_call"
+func.func @op_custom_call(%arg0: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.custom_call_v1"(%arg0) <{
+  // CHECK-SAME:   api_version = #vhlo<api_version_v1 API_VERSION_STATUS_RETURNING>,
+  // CHECK-SAME:   backend_config = #vhlo.string_v1<"\08\03\1A\02">,
+  // CHECK-SAME:   call_target_name = #vhlo.string_v1<"foo">,
+  // CHECK-SAME:   called_computations = #vhlo.array_v1<[#vhlo.string_v1<"foo">]>,
+  // CHECK-SAME:   has_side_effect = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   operand_layouts = #vhlo.array_v1<[#vhlo.tensor_v1<dense<> : tensor<0xindex>>]>,
+  // CHECK-SAME:   output_operand_aliases = #vhlo.array_v1<[
+  // CHECK-SAME:     #vhlo.output_operand_alias_v1<
+  // CHECK-SAME:       outputTupleIndices = [],
+  // CHECK-SAME:       operandIndex = 0,
+  // CHECK-SAME:       operandTupleIndices = []>]>
+  // CHECK-SAME:   result_layouts = #vhlo.array_v1<[#vhlo.tensor_v1<dense<> : tensor<0xindex>>]>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo",
+    has_side_effect = true,
+    backend_config = "\08\03\1A\02",
+    api_version = 2 : i32,
+    called_computations = [@foo],
+    operand_layouts = [dense<> : tensor<0xindex>],
+    output_operand_aliases = [
+      #stablehlo.output_operand_alias<output_tuple_indices = [],
+                                 operand_index = 0,
+                                 operand_tuple_indices = []>],
+    result_layouts = [dense<> : tensor<0xindex>]
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_divide"
+func.func @op_divide(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.divide_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.divide"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_dot_general"
+func.func @op_dot_general(%arg0: tensor<8x8x16xf32>, %arg1: tensor<8x16x8xf32>) -> tensor<8x8x8xf32> {
+  //      CHECK: "vhlo.dot_general_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   lhs_batching_dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   lhs_contracting_dimensions = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGHEST>, #vhlo<precision_v1 HIGHEST>]>,
+  // CHECK-SAME:   rhs_batching_dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   rhs_contracting_dimensions = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<8x16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dot_general"(%arg0, %arg1) {
+    dot_dimension_numbers = #stablehlo.dot<
+      lhs_batching_dimensions = [0],
+      lhs_contracting_dimensions = [2],
+      rhs_batching_dimensions = [0],
+      rhs_contracting_dimensions = [1]
+    >,
+    precision_config = [#stablehlo<precision HIGHEST>, #stablehlo<precision HIGHEST>]
+  } : (tensor<8x8x16xf32>, tensor<8x16x8xf32>) -> tensor<8x8x8xf32>
+  func.return %0 : tensor<8x8x8xf32>
+}
+
+// CHECK-LABEL: "op_dot"
+func.func @op_dot(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  //      CHECK: "vhlo.dot_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGHEST>, #vhlo<precision_v1 HIGHEST>]>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dot"(%arg0, %arg1) {
+    precision_config = [#stablehlo<precision HIGHEST>, #stablehlo<precision HIGHEST>]
+  } : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_broadcast_in_dim"
+func.func @op_dynamic_broadcast_in_dim(%arg0: tensor<?x?xf32>, %arg1: tensor<2xindex>) -> tensor<?x?xf32> {
+  //      CHECK: "vhlo.dynamic_broadcast_in_dim_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   broadcast_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   known_expanding_dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   known_nonexpanding_dimensions = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<?x?x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x?x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_broadcast_in_dim"(%arg0, %arg1) {
+    broadcast_dimensions = array<i64: 0, 1>,
+    known_expanding_dimensions = array<i64: 0>,
+    known_nonexpanding_dimensions = array<i64: 1>
+  } : (tensor<?x?xf32>, tensor<2xindex>) -> tensor<?x?xf32>
+  func.return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_conv"
+func.func @op_dynamic_conv(%arg0: tensor<1x8x8x207xf32>, %arg1: tensor<3x3x207x16xf32>, %arg2: tensor<4xi32>) -> tensor<1x?x?x16xf32> {
+  //      CHECK: "vhlo.dynamic_conv_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   batch_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   feature_group_count = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   input_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   input_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   input_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   kernel_input_feature_dimension = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   kernel_output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   kernel_spatial_dimensions = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   lhs_dilation = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>,
+  // CHECK-SAME:   output_batch_dimension = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   output_feature_dimension = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   output_spatial_dimensions = #vhlo.tensor_v1<dense<[1, 2]> : tensor<2xi64>>,
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<1> : tensor<2x2xi64>>,
+  // CHECK-SAME:   precision_config = #vhlo.array_v1<[#vhlo<precision_v1 HIGHEST>, #vhlo<precision_v1 HIGHEST>]>,
+  // CHECK-SAME:   rhs_dilation = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>,
+  // CHECK-SAME:   window_reversal = #vhlo.tensor_v1<dense<true> : tensor<2xi1>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<2> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x8x8x207x!vhlo.f32_v1>, !vhlo.tensor_v1<3x3x207x16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x?x?x16x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_conv"(%arg0, %arg1, %arg2) {
+    window_strides = dense<2> : tensor<2xi64>,
+    padding = dense<1> : tensor<2x2xi64>,
+    lhs_dilation = dense<2> : tensor<2xi64>,
+    rhs_dilation = dense<2> : tensor<2xi64>,
+    window_reversal = dense<true> : tensor<2xi1>,
+    dimension_numbers = #stablehlo.conv<[b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f]>,
+    feature_group_count = 1 : i64,
+    batch_group_count = 1 : i64,
+    precision_config = [#stablehlo<precision HIGHEST>, #stablehlo<precision HIGHEST>]
+  } : (tensor<1x8x8x207xf32>, tensor<3x3x207x16xf32>, tensor<4xi32>) -> tensor<1x?x?x16xf32>
+  func.return %0 : tensor<1x?x?x16xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_gather"
+func.func @op_dynamic_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>, %arg2 : tensor<3xi32>) -> tensor<1x5x8xf32> {
+  //      CHECK: "vhlo.dynamic_gather_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   collapsed_slice_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   offset_dims = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   start_index_map = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<2x4x9x!vhlo.f32_v1>, !vhlo.tensor_v1<1x5x2x!vhlo.i32_v1>, !vhlo.tensor_v1<3x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x5x8x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_gather"(%arg0, %arg1, %arg2) {
+    dimension_numbers = #stablehlo.gather<
+      offset_dims = [2],
+      collapsed_slice_dims = [0, 1],
+      start_index_map = [0, 1],
+      index_vector_dim = 2
+    >,
+    indices_are_sorted = true
+  } : (tensor<2x4x9xf32>, tensor<1x5x2xi32>, tensor<3xi32>) -> tensor<1x5x8xf32>
+  func.return %0 : tensor<1x5x8xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_iota"
+func.func @op_dynamic_iota(%arg0: tensor<1xindex>) -> tensor<?xf32> {
+  //      CHECK: "vhlo.dynamic_iota_v1"(%arg0) <{
+  // CHECK-SAME:   iota_dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_iota"(%arg0) {
+    iota_dimension = 0 : i64
+  } : (tensor<1xindex>) -> tensor<?xf32>
+  func.return %0 : tensor<?xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_pad"
+func.func @op_dynamic_pad(%arg0: tensor<?xf32>, %arg1: tensor<f32>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>, %arg4: tensor<1xindex>) -> tensor<?xf32> {
+  // CHECK: "vhlo.dynamic_pad_v1"(%arg0, %arg1, %arg2, %arg3, %arg4) : (!vhlo.tensor_v1<?x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_pad"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor<?xf32>, tensor<f32>, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor<?xf32>
+  func.return %0 : tensor<?xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_reshape"
+func.func @op_dynamic_reshape(%arg0: tensor<16xf32>, %arg1: tensor<?xindex>) -> tensor<?x?xf32> {
+  // CHECK: "vhlo.dynamic_reshape_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<?x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x?x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_reshape"(%arg0, %arg1) : (tensor<16xf32>, tensor<?xindex>) -> tensor<?x?xf32>
+  func.return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_slice"
+func.func @op_dynamic_slice(%arg0: tensor<16xf32>, %arg1: tensor<i64>) -> tensor<4xf32> {
+  //      CHECK: "vhlo.dynamic_slice_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   slice_sizes = #vhlo.tensor_v1<dense<4> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.i64_v1>) -> !vhlo.tensor_v1<4x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_slice"(%arg0, %arg1) {
+    slice_sizes = array<i64: 4>
+  } : (tensor<16xf32>, tensor<i64>) -> tensor<4xf32>
+  func.return %0 : tensor<4xf32>
+}
+
+// CHECK-LABEL: "op_dynamic_update_slice"
+func.func @op_dynamic_update_slice(%arg0: tensor<16xf32>, %arg1: tensor<4xf32>, %arg2: tensor<i64>) -> tensor<16xf32> {
+  // CHECK: "vhlo.dynamic_update_slice_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>, !vhlo.tensor_v1<4x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.i64_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.dynamic_update_slice"(%arg0, %arg1, %arg2) : (tensor<16xf32>, tensor<4xf32>, tensor<i64>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_einsum"
+func.func @op_einsum(%arg0: tensor<8x16xf32>, %arg1: tensor<16x8xf32>) -> tensor<8x8xf32> {
+  //      CHECK: "vhlo.einsum_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   einsum_config = #vhlo.string_v1<"ab,bc->ac">
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x8x!vhlo.f32_v1>
+  %0 = "stablehlo.einsum"(%arg0, %arg1) {
+    einsum_config = "ab,bc->ac"
+  } : (tensor<8x16xf32>, tensor<16x8xf32>) -> tensor<8x8xf32>
+  func.return %0 : tensor<8x8xf32>
+}
+
+// CHECK-LABEL: "op_exponential_minus_one"
+func.func @op_exponential_minus_one(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.exponential_minus_one_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.exponential_minus_one"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_exponential"
+func.func @op_exponential(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.exponential_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.exponential"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_fft"
+func.func @op_fft(%arg0: tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>> {
+  //      CHECK: "vhlo.fft_v1"(%arg0) <{
+  // CHECK-SAME:   fft_length = #vhlo.tensor_v1<dense<16> : tensor<1xi64>>,
+  // CHECK-SAME:   fft_type = #vhlo<fft_type_v1 FFT>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.complex_v1<!vhlo.f32_v1>>) -> !vhlo.tensor_v1<16x!vhlo.complex_v1<!vhlo.f32_v1>>
+  %0 = "stablehlo.fft"(%arg0) {
+    fft_type = #stablehlo<fft_type FFT>,
+    fft_length = array<i64: 16>
+  } : (tensor<16xcomplex<f32>>) -> tensor<16xcomplex<f32>>
+  func.return %0 : tensor<16xcomplex<f32>>
+}
+
+// CHECK-LABEL: "op_floor"
+func.func @op_floor(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.floor_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.floor"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+func.func private @op_func(%arg0: tensor<f32> {stablehlo.arg = "0"}) -> (tensor<f32> {stablehlo.result = "0"}) {
+  // CHECK:      "vhlo.func_v1"() <{
+  // CHECK-SAME:   arg_attrs = #vhlo.array_v1<[#vhlo.dict_v1<{#vhlo.string_v1<"stablehlo.arg"> = #vhlo.string_v1<"0">}>]>,
+  // CHECK-SAME:   function_type = #vhlo.type_v1<!vhlo.func_v1<(!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>>>,
+  // CHECK-SAME:   res_attrs = #vhlo.array_v1<[#vhlo.dict_v1<{#vhlo.string_v1<"stablehlo.result"> = #vhlo.string_v1<"0">}>]>,
+  // CHECK-SAME:   sym_name = #vhlo.string_v1<"op_func">,
+  // CHECK-SAME:   sym_visibility = #vhlo.string_v1<"private">
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%arg0: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     "vhlo.return_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : () -> ()
+
+  func.return %arg0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_gather"
+func.func @op_gather(%arg0 : tensor<2x4x9xf32>, %arg1 : tensor<1x5x2xi32>) -> tensor<1x5x1xf32> {
+  //      CHECK: "vhlo.gather_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   collapsed_slice_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   offset_dims = #vhlo.tensor_v1<dense<2> : tensor<1xi64>>,
+  // CHECK-SAME:   slice_sizes = #vhlo.tensor_v1<dense<1> : tensor<3xi64>>,
+  // CHECK-SAME:   start_index_map = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<2x4x9x!vhlo.f32_v1>, !vhlo.tensor_v1<1x5x2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<1x5x1x!vhlo.f32_v1>
+  %0 = "stablehlo.gather"(%arg0, %arg1) {
+    dimension_numbers = #stablehlo.gather<
+      offset_dims = [2],
+      collapsed_slice_dims = [0, 1],
+      start_index_map = [0, 1],
+      index_vector_dim = 2
+    >,
+    slice_sizes = dense<1> : tensor<3xi64>,
+    indices_are_sorted = true
+  } : (tensor<2x4x9xf32>, tensor<1x5x2xi32>) -> tensor<1x5x1xf32>
+  func.return %0 : tensor<1x5x1xf32>
+}
+
+// CHECK-LABEL: "op_get_dimension_size"
+func.func @op_get_dimension_size(%arg0: tensor<?xf32>) -> tensor<i32> {
+  //      CHECK: "vhlo.get_dimension_size_v1"(%arg0) <{
+  // CHECK-SAME:   dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<?x!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.get_dimension_size"(%arg0) {
+    dimension = 0 : i64
+  } : (tensor<?xf32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_get_tuple_element"
+func.func @op_get_tuple_element(%arg0: tuple<tensor<f32>, tensor<i32>>) -> tensor<f32> {
+  //      CHECK: "vhlo.get_tuple_element_v1"(%arg0) <{
+  // CHECK-SAME:   index = #vhlo.integer_v1<0 : i32>
+  // CHECK-SAME: }> : (!vhlo.tuple_v1<!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.get_tuple_element"(%arg0) {
+    index = 0 : i32
+  } : (tuple<tensor<f32>, tensor<i32>>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_if"
+func.func @op_if(%arg0: tensor<i1>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.if_v1"(%arg0) ({
+  // CHECK-NEXT:   "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }, {
+  // CHECK-NEXT:   "vhlo.return_v1"(%arg2) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.if"(%arg0) ({
+    "stablehlo.return"(%arg1) : (tensor<f32>) -> ()
+  }, {
+    "stablehlo.return"(%arg2) : (tensor<f32>) -> ()
+  }) : (tensor<i1>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_imag"
+func.func @op_imag(%arg0: tensor<complex<f32>>) -> tensor<f32> {
+  // CHECK: "vhlo.imag_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.imag"(%arg0) : (tensor<complex<f32>>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_infeed"
+func.func @op_infeed(%arg0: !stablehlo.token) -> (tensor<f32>, !stablehlo.token) {
+  //               CHECK: "vhlo.infeed_v1"(%arg0) <{
+  //          CHECK-SAME:   infeed_config = #vhlo.string_v1<"foo">,
+  // CHECK-SAME{LITERAL}:   layout = #vhlo.array_v1<[#vhlo.array_v1<[]>]>
+  //          CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1)
+  %0:2 = "stablehlo.infeed"(%arg0) {
+    infeed_config = "foo",
+    layout = [[]]
+  } : (!stablehlo.token) -> (tensor<f32>, !stablehlo.token)
+  func.return %0#0, %0#1 : tensor<f32>, !stablehlo.token
+}
+
+// CHECK-LABEL: "op_iota"
+func.func @op_iota() -> tensor<16xf32> {
+  //      CHECK: "vhlo.iota_v1"() <{
+  // CHECK-SAME:   iota_dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : () -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.iota"() {
+    iota_dimension = 0 : i64
+  } : () -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_is_finite"
+func.func @op_is_finite(%arg0: tensor<f32>) -> tensor<i1> {
+  // CHECK: "vhlo.is_finite_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.is_finite"(%arg0) : (tensor<f32>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "op_log"
+func.func @op_log(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.log_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.log"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_log_plus_one"
+func.func @op_log_plus_one(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.log_plus_one_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.log_plus_one"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_logistic"
+func.func @op_logistic(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.logistic_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.logistic"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_map"
+func.func @op_map(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.map_v1"(%arg0) <{
+  // CHECK-SAME:   dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.abs_v1"(%[[ARG1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.map"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>):
+      %1 = "stablehlo.abs"(%arg1) : (tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    dimensions = dense<0> : tensor<1xi64>
+  } : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_maximum"
+func.func @op_maximum(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.maximum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.maximum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_minimum"
+func.func @op_minimum(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.minimum_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.minimum"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_multiply"
+func.func @op_multiply(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.multiply_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.multiply"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_negate"
+func.func @op_negate(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.negate_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.negate"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_not"
+func.func @op_not(%arg0: tensor<i1>) -> tensor<i1> {
+  // CHECK: "vhlo.not_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.not"(%arg0) : (tensor<i1>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "op_optimization_barrier"
+func.func @op_optimization_barrier(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.optimization_barrier_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.optimization_barrier"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_or"
+func.func @op_or(%arg0: tensor<i1>, %arg1: tensor<i1>) -> tensor<i1> {
+  // CHECK: "vhlo.or_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.or"(%arg0, %arg1) : (tensor<i1>, tensor<i1>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "op_outfeed"
+func.func @op_outfeed(%arg0: tensor<f32>, %arg1: !stablehlo.token) -> !stablehlo.token {
+  //      CHECK: "vhlo.outfeed_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   outfeed_config = #vhlo.string_v1<"foo">
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1) -> !vhlo.token_v1
+  %0 = "stablehlo.outfeed"(%arg0, %arg1) {
+    outfeed_config = "foo"
+  } : (tensor<f32>, !stablehlo.token) -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "op_pad"
+func.func @op_pad(%arg0: tensor<8xf32>, %arg1: tensor<f32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.pad_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   edge_padding_high = #vhlo.tensor_v1<dense<4> : tensor<1xi64>>,
+  // CHECK-SAME:   edge_padding_low = #vhlo.tensor_v1<dense<4> : tensor<1xi64>>,
+  // CHECK-SAME:   interior_padding = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.pad"(%arg0, %arg1) {
+    edge_padding_high = array<i64: 4>,
+    edge_padding_low = array<i64: 4>,
+    interior_padding = array<i64: 0>
+  } : (tensor<8xf32>, tensor<f32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_popcnt"
+func.func @op_popcnt(%arg0: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.popcnt_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.popcnt"(%arg0) : (tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_power"
+func.func @op_power(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.power_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.power"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_real_dynamic_slice"
+func.func @op_real_dynamic_slice(%arg0: tensor<?xf32>, %arg1: tensor<1xindex>, %arg2: tensor<1xindex>, %arg3: tensor<1xindex>) -> tensor<?xf32> {
+  // CHECK: "vhlo.real_dynamic_slice_v1"(%arg0, %arg1, %arg2, %arg3) : (!vhlo.tensor_v1<?x!vhlo.f32_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>, !vhlo.tensor_v1<1x!vhlo.index_v1>) -> !vhlo.tensor_v1<?x!vhlo.f32_v1>
+  %0 = "stablehlo.real_dynamic_slice"(%arg0, %arg1, %arg2, %arg3) : (tensor<?xf32>, tensor<1xindex>, tensor<1xindex>, tensor<1xindex>) -> tensor<?xf32>
+  func.return %0 : tensor<?xf32>
+}
+
+// CHECK-LABEL: "op_real"
+func.func @op_real(%arg0: tensor<complex<f32>>) -> tensor<f32> {
+  // CHECK: "vhlo.real_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.real"(%arg0) : (tensor<complex<f32>>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_recv"
+func.func @op_recv(%arg0: !stablehlo.token) -> (tensor<f32>, !stablehlo.token) {
+  //      CHECK: "vhlo.recv_v1"(%arg0) <{
+  // CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   channel_type = #vhlo.integer_v1<3 : i64>,
+  // CHECK-SAME:   is_host_transfer = #vhlo.bool_v1<true>
+  // CHECK-SAME: }> : (!vhlo.token_v1) -> (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1)
+  %0:2 = "stablehlo.recv"(%arg0) {
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 3>,
+    is_host_transfer = true
+  } : (!stablehlo.token) -> (tensor<f32>, !stablehlo.token)
+  func.return %0#0, %0#1 : tensor<f32>, !stablehlo.token
+}
+
+// CHECK-LABEL: "op_reduce"
+func.func @op_reduce(%arg0: tensor<16xf32>, %arg1: tensor<f32>) -> tensor<f32> {
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+      %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    dimensions = dense<0> : tensor<1xi64>
+  } : (tensor<16xf32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_reduce_precision"
+func.func @op_reduce_precision(%arg0: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.reduce_precision_v1"(%arg0) <{
+  // CHECK-SAME:   exponent_bits = #vhlo.integer_v1<8 : i32>
+  // CHECK-SAME:   mantissa_bits = #vhlo.integer_v1<10 : i32>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.reduce_precision"(%arg0) {
+    exponent_bits = 8 : i32,
+    mantissa_bits = 10 : i32
+  } : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK_lABEL: "op_reduce_with_promotable_types"
+func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor<f32>)
+    -> (tensor<4xf64>) {
+  //  CHECK: "vhlo.reduce_v1"(%arg0, %arg1)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x!vhlo.f64_v1>
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+  ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xf32>, tensor<f32>) -> tensor<4xf64>
+
+  func.return %0: tensor<4xf64>
+}
+
+// CHECK-LABEL: "op_reduce_scatter"
+func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //               CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{
+  //          CHECK-SAME:   channel_id = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME{LITERAL}:   replica_groups = #vhlo.tensor_v1<dense<[[0], [1]]> : tensor<2x1xi64>>,
+  //          CHECK-SAME:   scatter_dimension = #vhlo.integer_v1<0 : i64>
+  //          CHECK-SAME:   use_global_device_ids = #vhlo.bool_v1<true>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG1]], %[[ARG2]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.reduce_scatter"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.add"(%arg1, %arg2) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    scatter_dimension = 0 : i64,
+    replica_groups = dense<[[0], [1]]> : tensor<2x1xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+    use_global_device_ids
+  } : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK_lABEL: "op_reduce_scatter_with_promotable_types"
+func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> {
+  //  CHECK: "vhlo.reduce_scatter_v1"(%arg0)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1>
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xf32>) -> tensor<4x4xf64>
+  func.return %0 : tensor<4x4xf64>
+}
+
+
+// CHECK-LABEL: "op_reduce_window"
+func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor<f32>) -> tensor<2x9x16x7xf32> {
+  //               CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{
+  //          CHECK-SAME:   base_dilations = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  // CHECK-SAME{LITERAL}:   padding = #vhlo.tensor_v1<dense<[[0, 0], [2, 0], [0, 2], [0, 0]]> : tensor<4x2xi64>>,
+  //          CHECK-SAME:   window_dilations = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  //          CHECK-SAME:   window_dimensions = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  //          CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<[1, 4, 4, 1]> : tensor<4xi64>>
+  //          CHECK-SAME: }> ({
+  //          CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //          CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.maximum_v1"(%[[ARG2]], %[[ARG3]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  //          CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //          CHECK-NEXT: }) : (!vhlo.tensor_v1<2x17x31x7x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<2x9x16x7x!vhlo.f32_v1>
+  %0 = "stablehlo.reduce_window"(%arg0, %arg1) ({
+    ^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
+      %1 = "stablehlo.maximum"(%arg2, %arg3) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 4, 4, 1]> : tensor<4xi64>,
+    base_dilations = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_dilations = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    padding = dense<[[0, 0], [2, 0], [0, 2], [0, 0]]> : tensor<4x2xi64>
+  } : (tensor<2x17x31x7xf32>, tensor<f32>) -> tensor<2x9x16x7xf32>
+  func.return %0 : tensor<2x9x16x7xf32>
+}
+
+// CHECK_lABEL: "op_reduce_window_with_promotable_types"
+func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>,
+    %arg1: tensor<4x2xf32>, %init0: tensor<f32>, %init1: tensor<f32>) ->
+    (tensor<2x2xf64>, tensor<2x2xf32>) {
+  //  CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG4:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>)
+  %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
+         ^bb0(%a0: tensor<f64>, %a1: tensor<f32>, %b0: tensor<f64>,
+                %b1: tensor<f32>):
+              %2 = stablehlo.add %a0, %b0 : tensor<f64>
+              %3 = stablehlo.add %a1, %b1 : tensor<f32>
+              "stablehlo.return"(%2,%3) : (tensor<f64>, tensor<f32>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64> }
+         : (tensor<4x2xf32>, tensor<4x2xf32>, tensor<f32>, tensor<f32>) ->
+              (tensor<2x2xf64>, tensor<2x2xf32>)
+  func.return %0#0, %0#1 : tensor<2x2xf64>, tensor<2x2xf32>
+}
+
+// CHECK-LABEL: "op_remainder"
+func.func @op_remainder(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.remainder"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_replica_id"
+func.func @op_replica_id() -> tensor<ui32> {
+  // CHECK: "vhlo.replica_id_v1"() : () -> !vhlo.tensor_v1<!vhlo.ui32_v1>
+  %0 = "stablehlo.replica_id"() : () -> tensor<ui32>
+  func.return %0 : tensor<ui32>
+}
+
+// CHECK-LABEL: "op_partition_id"
+func.func @op_partition_id() -> tensor<ui32> {
+  // CHECK: "vhlo.partition_id_v1"() : () -> !vhlo.tensor_v1<!vhlo.ui32_v1>
+  %0 = "stablehlo.partition_id"() : () -> tensor<ui32>
+  func.return %0 : tensor<ui32>
+}
+
+// CHECK-LABEL: "op_reshape"
+func.func @op_reshape(%arg0: tensor<16xf32>) -> tensor<4x4xf32> {
+  // CHECK: "vhlo.reshape_v1"(%arg0) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f32_v1>
+  %0 = "stablehlo.reshape"(%arg0) : (tensor<16xf32>) -> tensor<4x4xf32>
+  func.return %0 : tensor<4x4xf32>
+}
+
+// CHECK-LABEL: "op_return"
+func.func @op_return(%arg0: tensor<i32>, %arg1: tensor<f32>) -> tensor<f32> {
+  //      CHECK: "vhlo.case_v1"(%arg0) ({
+  // CHECK-NEXT:   "vhlo.return_v1"(%arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.case"(%arg0) ({
+    "stablehlo.return"(%arg1) : (tensor<f32>) -> ()
+  }) : (tensor<i32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_reverse"
+func.func @op_reverse(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.reverse_v1"(%arg0) <{
+  // CHECK-SAME:   dimensions = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.reverse"(%arg0) {
+    dimensions = array<i64: 0>
+  } : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_rng_bit_generator"
+func.func @op_rng_bit_generator(%arg0: tensor<f32>) -> (tensor<f32>, tensor<f32>) {
+  //      CHECK: "vhlo.rng_bit_generator_v1"(%arg0) <{
+  // CHECK-SAME:   rng_algorithm = #vhlo<rng_algorithm_v1 PHILOX>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>)
+  %0:2 = "stablehlo.rng_bit_generator"(%arg0) {
+    rng_algorithm = #stablehlo<rng_algorithm PHILOX>
+  } : (tensor<f32>) -> (tensor<f32>, tensor<f32>)
+  func.return %0#0, %0#1 : tensor<f32>, tensor<f32>
+}
+
+// CHECK-LABEL: "op_rng"
+func.func @op_rng(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<?xindex>) -> tensor<f32> {
+  //      CHECK: "vhlo.rng_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   rng_distribution = #vhlo<rng_distribution_v1 NORMAL>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<?x!vhlo.index_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.rng"(%arg0, %arg1, %arg2) {
+    rng_distribution = #stablehlo<rng_distribution NORMAL>
+  } : (tensor<f32>, tensor<f32>, tensor<?xindex>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_round_nearest_afz"
+func.func @op_round_nearest_afz(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.round_nearest_afz_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.round_nearest_afz"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_round_nearest_even"
+func.func @op_round_nearest_even(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.round_nearest_even_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.round_nearest_even"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_rsqrt"
+func.func @op_rsqrt(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.rsqrt_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.rsqrt"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_scatter"
+func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %arg2: tensor<10x300xf32>) -> tensor<200x100x300xf32> {
+  //      CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   index_vector_dim = #vhlo.integer_v1<1 : i64>,
+  // CHECK-SAME:   indices_are_sorted = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   inserted_window_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   scatter_dims_to_operand_dims = #vhlo.tensor_v1<dense<[0, 1]> : tensor<2xi64>>,
+  // CHECK-SAME:   unique_indices = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   update_window_dims = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG4:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.add_v1"(%[[ARG3]], %[[ARG4]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>
+  %0 = "stablehlo.scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) -> tensor<200x100x300xf32>
+  func.return %0 : tensor<200x100x300xf32>
+}
+
+// CHECK_lABEL: "op_scatter_with_promotable_types"
+func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
+      tensor<200x100x300xf64> {
+  //  CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1>
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<f64>, %rhs: tensor<f64>):
+    %add = stablehlo.add %lhs, %rhs : tensor<f64>
+    "stablehlo.return"(%add) : (tensor<f64>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) ->
+      tensor<200x100x300xf64>
+  func.return %0 : tensor<200x100x300xf64>
+}
+
+// CHECK-LABEL: "op_select_and_scatter"
+func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor<f32>) -> tensor<10x24x24x64xf32> {
+  //      CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{
+  // CHECK-SAME:   padding = #vhlo.tensor_v1<dense<1> : tensor<4x2xi64>>,
+  // CHECK-SAME:   window_dimensions = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>,
+  // CHECK-SAME:   window_strides = #vhlo.tensor_v1<dense<[1, 2, 2, 1]> : tensor<4xi64>>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG31:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG41:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL11:.*]] = "vhlo.compare_v1"(%[[ARG31]], %[[ARG41]]) <{compare_type = #vhlo<comparison_type_v1 TOTALORDER>, comparison_direction = #vhlo<comparison_direction_v1 GE>}> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL11]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT: }, {
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG32:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG42:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL12:.*]] = "vhlo.add_v1"(%[[ARG32]], %[[ARG42]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL12]]) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>, !vhlo.tensor_v1<12x13x13x66x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>
+  %0 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg3, %arg4) {compare_type = #stablehlo<comparison_type TOTALORDER>, comparison_direction = #stablehlo<comparison_direction GE>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }, {
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+      "stablehlo.return"(%1) : (tensor<f32>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    padding = dense<1> : tensor<4x2xi64>
+  } : (tensor<10x24x24x64xf32>, tensor<12x13x13x66xf32>, tensor<f32>) -> tensor<10x24x24x64xf32>
+  func.return %0 : tensor<10x24x24x64xf32>
+}
+
+// CHECK-LABEL: "op_select_and_scatter_with_promotable_types"
+func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor<f32>) -> tensor<10x24x24x64xf64> {
+  // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2)
+  // CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  // CHECK:     "vhlo.return_v1"(%[[VAL12]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  // CHECK: }) : (!vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>, !vhlo.tensor_v1<12x13x13x66x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<10x24x24x64x!vhlo.f64_v1>
+  %0 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg3, %arg4) {compare_type = #stablehlo<comparison_type TOTALORDER>, comparison_direction = #stablehlo<comparison_direction GE>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }, {
+    ^bb0(%arg3: tensor<f64>, %arg4: tensor<f64>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    padding = dense<1> : tensor<4x2xi64>
+  } : (tensor<10x24x24x64xf32>, tensor<12x13x13x66xf32>, tensor<f32>) -> tensor<10x24x24x64xf64>
+  func.return %0 : tensor<10x24x24x64xf64>
+}
+
+// CHECK-LABEL: "op_select"
+func.func @op_select(%arg0: tensor<i1>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.select"(%arg0, %arg1, %arg2) : (tensor<i1>, tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_send"
+func.func @op_send(%arg0: tensor<f32>, %arg1: !stablehlo.token) -> !stablehlo.token {
+  //      CHECK: "vhlo.send_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   channel_id = #vhlo.integer_v1<0 : i64>,
+  // CHECK-SAME:   channel_type = #vhlo.integer_v1<2 : i64>,
+  // CHECK-SAME:   is_host_transfer = #vhlo.bool_v1<true>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.token_v1) -> !vhlo.token_v1
+  %0 = "stablehlo.send"(%arg0, %arg1) {
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 2>,
+    is_host_transfer = true
+  } : (tensor<f32>, !stablehlo.token) -> !stablehlo.token
+  func.return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "op_set_dimension_size"
+func.func @op_set_dimension_size(%arg0: tensor<?xf32>, %arg1: tensor<i32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.set_dimension_size_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<?x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.set_dimension_size"(%arg0, %arg1) {
+    dimension = 0 : i64
+  } : (tensor<?xf32>, tensor<i32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_shift_left"
+func.func @op_shift_left(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.shift_left_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.shift_left"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_shift_right_arithmetic"
+func.func @op_shift_right_arithmetic(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.shift_right_arithmetic_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.shift_right_arithmetic"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_shift_right_logical"
+func.func @op_shift_right_logical(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.shift_right_logical_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.shift_right_logical"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "op_sign"
+func.func @op_sign(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.sign_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.sign"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_sine"
+func.func @op_sine(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.sine_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.sine"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_slice"
+func.func @op_slice(%arg0: tensor<16xf32>) -> tensor<4xf32> {
+  //      CHECK: "vhlo.slice_v1"(%arg0) <{
+  // CHECK-SAME:   limit_indices = #vhlo.tensor_v1<dense<4> : tensor<1xi64>>,
+  // CHECK-SAME:   start_indices = #vhlo.tensor_v1<dense<0> : tensor<1xi64>>,
+  // CHECK-SAME:   strides = #vhlo.tensor_v1<dense<1> : tensor<1xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x!vhlo.f32_v1>
+  %0 = "stablehlo.slice"(%arg0) {
+    start_indices = array<i64: 0>,
+    limit_indices = array<i64: 4>,
+    strides = array<i64: 1>
+  } : (tensor<16xf32>) -> tensor<4xf32>
+  func.return %0 : tensor<4xf32>
+}
+
+// CHECK-LABEL: "op_sort"
+func.func @op_sort(%arg0: tensor<16xf32>) -> tensor<16xf32> {
+  //      CHECK: "vhlo.sort_v1"(%arg0) <{
+  // CHECK-SAME:   dimension = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME:   is_stable = #vhlo.bool_v1<true>
+  // CHECK-SAME: }> ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  // CHECK-NEXT:     %[[VAL1:.*]] = "vhlo.compare_v1"(%[[ARG1]], %[[ARG2]]) <{compare_type = #vhlo<comparison_type_v1 FLOAT>, comparison_direction = #vhlo<comparison_direction_v1 GT>}>
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[VAL1]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x!vhlo.f32_v1>
+  %0 = "stablehlo.sort"(%arg0) ({
+    ^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg1, %arg2) {compare_type = #stablehlo<comparison_type FLOAT>, comparison_direction = #stablehlo<comparison_direction GT>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }) {
+    dimension = 0 : i64,
+    is_stable = true
+  } : (tensor<16xf32>) -> tensor<16xf32>
+  func.return %0 : tensor<16xf32>
+}
+
+// CHECK-LABEL: "op_sqrt"
+func.func @op_sqrt(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.sqrt_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.sqrt"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_subtract"
+func.func @op_subtract(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.subtract_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.subtract"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_tanh"
+func.func @op_tanh(%arg0: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.tanh_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.tanh"(%arg0) : (tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_torch_index_select"
+func.func @op_torch_index_select(%arg0: tensor<5x1x5xf32>, %arg1: tensor<2xi32>) ->  tensor<2x1x5xf32> {
+  //      CHECK: "vhlo.torch_index_select_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   batch_dims = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME:   dim = #vhlo.integer_v1<0 : i64>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<5x1x5x!vhlo.f32_v1>, !vhlo.tensor_v1<2x!vhlo.i32_v1>) -> !vhlo.tensor_v1<2x1x5x!vhlo.f32_v1>
+  %0 = "stablehlo.torch_index_select"(%arg0, %arg1) {
+    dim = 0 : i64,
+    batch_dims = 0 : i64
+  } : (tensor<5x1x5xf32>, tensor<2xi32>) -> tensor<2x1x5xf32>
+  func.return %0 : tensor<2x1x5xf32>
+}
+
+// CHECK-LABEL: "op_trace"
+func.func @op_trace(%arg0: tensor<f32>) {
+  //      CHECK: "vhlo.trace_v1"(%arg0) <{
+  // CHECK-SAME:   tag = #vhlo.string_v1<"foo">
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  "stablehlo.trace"(%arg0) {
+    tag = "foo"
+  } : (tensor<f32>) -> ()
+  func.return
+}
+
+// CHECK-LABEL: "op_transpose"
+func.func @op_transpose(%arg0: tensor<16x8xf32>) ->  tensor<8x16xf32> {
+  //      CHECK: "vhlo.transpose_v1"(%arg0) <{
+  // CHECK-SAME:   permutation = #vhlo.tensor_v1<dense<[1, 0]> : tensor<2xi64>>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x8x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x16x!vhlo.f32_v1>
+  %0 = "stablehlo.transpose"(%arg0) {
+    permutation = array<i64: 1, 0>
+  } : (tensor<16x8xf32>) -> tensor<8x16xf32>
+  func.return %0 : tensor<8x16xf32>
+}
+
+// CHECK-LABEL: "op_triangular_solve"
+func.func @op_triangular_solve(%arg0: tensor<16x16xf32>, %arg1: tensor<16x16xf32>) ->  tensor<16x16xf32> {
+  //      CHECK: "vhlo.triangular_solve_v1"(%arg0, %arg1) <{
+  // CHECK-SAME:   left_side = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   lower = #vhlo.bool_v1<true>,
+  // CHECK-SAME:   transpose_a = #vhlo<transpose_v1 NO_TRANSPOSE>,
+  // CHECK-SAME:   unit_diagonal = #vhlo.bool_v1<true>
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<16x16x!vhlo.f32_v1>, !vhlo.tensor_v1<16x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<16x16x!vhlo.f32_v1>
+  %0 = "stablehlo.triangular_solve"(%arg0, %arg1) {
+    left_side = true,
+    lower = true,
+    unit_diagonal = true,
+    transpose_a = #stablehlo<transpose NO_TRANSPOSE>
+  } : (tensor<16x16xf32>, tensor<16x16xf32>) -> tensor<16x16xf32>
+  func.return %0 : tensor<16x16xf32>
+}
+
+// CHECK-LABEL: "op_tuple"
+func.func @op_tuple(%arg0: tensor<f32>) -> tuple<tensor<f32>> {
+  // CHECK: "vhlo.tuple_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tuple_v1<!vhlo.tensor_v1<!vhlo.f32_v1>>
+  %0 = "stablehlo.tuple"(%arg0) : (tensor<f32>) -> tuple<tensor<f32>>
+  func.return %0 : tuple<tensor<f32>>
+}
+
+// CHECK-LABEL: "op_unary_einsum"
+func.func @op_unary_einsum(%arg0: tensor<8x16xf32>) -> tensor<8xf32> {
+  //      CHECK: "vhlo.unary_einsum_v1"(%arg0) <{
+  // CHECK-SAME:   einsum_config = #vhlo.string_v1<"ab->a">
+  // CHECK-SAME: }> : (!vhlo.tensor_v1<8x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<8x!vhlo.f32_v1>
+  %0 = "stablehlo.unary_einsum"(%arg0) {
+    einsum_config = "ab->a"
+  } : (tensor<8x16xf32>) -> tensor<8xf32>
+  func.return %0 : tensor<8xf32>
+}
+
+// CHECK-LABEL: "op_uniform_dequantize"
+func.func @op_uniform_dequantize(%arg0: tensor<!quant.uniform<i8:f32, 34.0:16>>) -> tensor<f32> {
+  // CHECK: "vhlo.uniform_dequantize_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.quant_v1<!vhlo.i8_v1:!vhlo.f32_v1, 3.400000e+01:16, -128:127, 1>>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.uniform_dequantize"(%arg0) : (tensor<!quant.uniform<i8:f32, 34.0:16>>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "op_uniform_quantize"
+func.func @op_uniform_quantize(%arg0: tensor<f32>) -> tensor<!quant.uniform<i8:f32, 34.0:16>> {
+  // CHECK: "vhlo.uniform_quantize_v1"(%arg0) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.quant_v1<!vhlo.i8_v1:!vhlo.f32_v1, 3.400000e+01:16, -128:127, 1>>
+  %0 = "stablehlo.uniform_quantize"(%arg0) : (tensor<f32>) -> tensor<!quant.uniform<i8:f32, 34.0:16>>
+  func.return %0 : tensor<!quant.uniform<i8:f32, 34.0:16>>
+}
+
+// CHECK-LABEL: "op_while"
+func.func @op_while(%arg0: tensor<i1>) -> tensor<i1> {
+  //      CHECK: "vhlo.while_v1"(%arg0) ({
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.bool_v1>):
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT:   }, {
+  // CHECK-NEXT:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.bool_v1>)
+  // CHECK-NEXT:     "vhlo.return_v1"(%[[ARG1]]) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> ()
+  // CHECK-NEXT: }) : (!vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.while"(%arg0) ({
+    ^bb0(%arg1: tensor<i1>):
+      "stablehlo.return"(%arg1) : (tensor<i1>) -> ()
+    }, {
+    ^bb0(%arg1: tensor<i1>):
+      "stablehlo.return"(%arg1) : (tensor<i1>) -> ()
+  }) : (tensor<i1>) -> tensor<i1>
+  func.return %0: tensor<i1>
+}
+
+// CHECK-LABEL: "op_xor"
+func.func @op_xor(%arg0: tensor<i1>, %arg1: tensor<i1>) -> tensor<i1> {
+  // CHECK: "vhlo.xor_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.xor"(%arg0, %arg1) : (tensor<i1>, tensor<i1>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// ============ TYPES ============
+
+// CHECK-LABEL: "type_i1"
+func.func @type_i1(%arg0: tensor<i1>, %arg1: tensor<i1>) -> tensor<i1> {
+  // CHECK: "vhlo.and_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.bool_v1>) -> !vhlo.tensor_v1<!vhlo.bool_v1>
+  %0 = "stablehlo.and"(%arg0, %arg1) : (tensor<i1>, tensor<i1>) -> tensor<i1>
+  func.return %0 : tensor<i1>
+}
+
+// CHECK-LABEL: "type_i4"
+func.func @type_i4(%arg0: tensor<i4>, %arg1: tensor<i4>) -> tensor<i4> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i4_v1>, !vhlo.tensor_v1<!vhlo.i4_v1>) -> !vhlo.tensor_v1<!vhlo.i4_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i4>, tensor<i4>) -> tensor<i4>
+  func.return %0 : tensor<i4>
+}
+
+// CHECK-LABEL: "type_i8"
+func.func @type_i8(%arg0: tensor<i8>, %arg1: tensor<i8>) -> tensor<i8> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i8_v1>, !vhlo.tensor_v1<!vhlo.i8_v1>) -> !vhlo.tensor_v1<!vhlo.i8_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+  func.return %0 : tensor<i8>
+}
+
+// CHECK-LABEL: "type_i16"
+func.func @type_i16(%arg0: tensor<i16>, %arg1: tensor<i16>) -> tensor<i16> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i16_v1>, !vhlo.tensor_v1<!vhlo.i16_v1>) -> !vhlo.tensor_v1<!vhlo.i16_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i16>, tensor<i16>) -> tensor<i16>
+  func.return %0 : tensor<i16>
+}
+
+// CHECK-LABEL: "type_i32"
+func.func @type_i32(%arg0: tensor<i32>, %arg1: tensor<i32>) -> tensor<i32> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i32_v1>, !vhlo.tensor_v1<!vhlo.i32_v1>) -> !vhlo.tensor_v1<!vhlo.i32_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i32>, tensor<i32>) -> tensor<i32>
+  func.return %0 : tensor<i32>
+}
+
+// CHECK-LABEL: "type_i64"
+func.func @type_i64(%arg0: tensor<i64>, %arg1: tensor<i64>) -> tensor<i64> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.i64_v1>, !vhlo.tensor_v1<!vhlo.i64_v1>) -> !vhlo.tensor_v1<!vhlo.i64_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<i64>, tensor<i64>) -> tensor<i64>
+  func.return %0 : tensor<i64>
+}
+
+// CHECK-LABEL: "type_ui4"
+func.func @type_ui4(%arg0: tensor<ui4>, %arg1: tensor<ui4>) -> tensor<ui4> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.ui4_v1>, !vhlo.tensor_v1<!vhlo.ui4_v1>) -> !vhlo.tensor_v1<!vhlo.ui4_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<ui4>, tensor<ui4>) -> tensor<ui4>
+  func.return %0 : tensor<ui4>
+}
+
+// CHECK-LABEL: "type_ui8"
+func.func @type_ui8(%arg0: tensor<ui8>, %arg1: tensor<ui8>) -> tensor<ui8> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.ui8_v1>, !vhlo.tensor_v1<!vhlo.ui8_v1>) -> !vhlo.tensor_v1<!vhlo.ui8_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<ui8>, tensor<ui8>) -> tensor<ui8>
+  func.return %0 : tensor<ui8>
+}
+
+// CHECK-LABEL: "type_ui16"
+func.func @type_ui16(%arg0: tensor<ui16>, %arg1: tensor<ui16>) -> tensor<ui16> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.ui16_v1>, !vhlo.tensor_v1<!vhlo.ui16_v1>) -> !vhlo.tensor_v1<!vhlo.ui16_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<ui16>, tensor<ui16>) -> tensor<ui16>
+  func.return %0 : tensor<ui16>
+}
+
+// CHECK-LABEL: "type_ui32"
+func.func @type_ui32(%arg0: tensor<ui32>, %arg1: tensor<ui32>) -> tensor<ui32> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.ui32_v1>, !vhlo.tensor_v1<!vhlo.ui32_v1>) -> !vhlo.tensor_v1<!vhlo.ui32_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<ui32>, tensor<ui32>) -> tensor<ui32>
+  func.return %0 : tensor<ui32>
+}
+
+// CHECK-LABEL: "type_ui64"
+func.func @type_ui64(%arg0: tensor<ui64>, %arg1: tensor<ui64>) -> tensor<ui64> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.ui64_v1>, !vhlo.tensor_v1<!vhlo.ui64_v1>) -> !vhlo.tensor_v1<!vhlo.ui64_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<ui64>, tensor<ui64>) -> tensor<ui64>
+  func.return %0 : tensor<ui64>
+}
+
+// CHECK-LABEL: "type_f8E4M3FN"
+func.func @type_f8E4M3FN(%arg0: tensor<f8E4M3FN>, %arg1: tensor<f8E4M3FN>) -> tensor<f8E4M3FN> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f8E4M3FN_v1>, !vhlo.tensor_v1<!vhlo.f8E4M3FN_v1>) -> !vhlo.tensor_v1<!vhlo.f8E4M3FN_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f8E4M3FN>, tensor<f8E4M3FN>) -> tensor<f8E4M3FN>
+  func.return %0 : tensor<f8E4M3FN>
+}
+
+// CHECK-LABEL: "type_f8E5M2"
+func.func @type_f8E5M2(%arg0: tensor<f8E5M2>, %arg1: tensor<f8E5M2>) -> tensor<f8E5M2> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f8E5M2_v1>, !vhlo.tensor_v1<!vhlo.f8E5M2_v1>) -> !vhlo.tensor_v1<!vhlo.f8E5M2_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f8E5M2>, tensor<f8E5M2>) -> tensor<f8E5M2>
+  func.return %0 : tensor<f8E5M2>
+}
+
+// CHECK-LABEL: "type_f8E4M3FNUZ"
+func.func @type_f8E4M3FNUZ(%arg0: tensor<f8E4M3FNUZ>, %arg1: tensor<f8E4M3FNUZ>) -> tensor<f8E4M3FNUZ> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f8E4M3FNUZ_v1>, !vhlo.tensor_v1<!vhlo.f8E4M3FNUZ_v1>) -> !vhlo.tensor_v1<!vhlo.f8E4M3FNUZ_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f8E4M3FNUZ>, tensor<f8E4M3FNUZ>) -> tensor<f8E4M3FNUZ>
+  func.return %0 : tensor<f8E4M3FNUZ>
+}
+
+// CHECK-LABEL: "type_f8E4M3B11FNUZ"
+func.func @type_f8E4M3B11FNUZ(%arg0: tensor<f8E4M3B11FNUZ>, %arg1: tensor<f8E4M3B11FNUZ>) -> tensor<f8E4M3B11FNUZ> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f8E4M3B11FNUZ_v1>, !vhlo.tensor_v1<!vhlo.f8E4M3B11FNUZ_v1>) -> !vhlo.tensor_v1<!vhlo.f8E4M3B11FNUZ_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f8E4M3B11FNUZ>, tensor<f8E4M3B11FNUZ>) -> tensor<f8E4M3B11FNUZ>
+  func.return %0 : tensor<f8E4M3B11FNUZ>
+}
+
+// CHECK-LABEL: "type_f8E5M2FNUZ"
+func.func @type_f8E5M2FNUZ(%arg0: tensor<f8E5M2FNUZ>, %arg1: tensor<f8E5M2FNUZ>) -> tensor<f8E5M2FNUZ> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f8E5M2FNUZ_v1>, !vhlo.tensor_v1<!vhlo.f8E5M2FNUZ_v1>) -> !vhlo.tensor_v1<!vhlo.f8E5M2FNUZ_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f8E5M2FNUZ>, tensor<f8E5M2FNUZ>) -> tensor<f8E5M2FNUZ>
+  func.return %0 : tensor<f8E5M2FNUZ>
+}
+
+// CHECK-LABEL: "type_bf16"
+func.func @type_bf16(%arg0: tensor<bf16>, %arg1: tensor<bf16>) -> tensor<bf16> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.bf16_v1>, !vhlo.tensor_v1<!vhlo.bf16_v1>) -> !vhlo.tensor_v1<!vhlo.bf16_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<bf16>, tensor<bf16>) -> tensor<bf16>
+  func.return %0 : tensor<bf16>
+}
+
+// CHECK-LABEL: "type_f16"
+func.func @type_f16(%arg0: tensor<f16>, %arg1: tensor<f16>) -> tensor<f16> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f16_v1>, !vhlo.tensor_v1<!vhlo.f16_v1>) -> !vhlo.tensor_v1<!vhlo.f16_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f16>, tensor<f16>) -> tensor<f16>
+  func.return %0 : tensor<f16>
+}
+
+// CHECK-LABEL: "type_f32"
+func.func @type_f32(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f32>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+// CHECK-LABEL: "type_f64"
+func.func @type_f64(%arg0: tensor<f64>, %arg1: tensor<f64>) -> tensor<f64> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f64_v1>, !vhlo.tensor_v1<!vhlo.f64_v1>) -> !vhlo.tensor_v1<!vhlo.f64_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+  func.return %0 : tensor<f64>
+}
+
+// CHECK-LABEL: "type_complex_f32"
+func.func @type_complex_f32(%arg0: tensor<complex<f32>>, %arg1: tensor<complex<f32>>) -> tensor<complex<f32>> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>, !vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>) -> !vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f32_v1>>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<complex<f32>>, tensor<complex<f32>>) -> tensor<complex<f32>>
+  func.return %0 : tensor<complex<f32>>
+}
+
+// CHECK-LABEL: "type_complex_f64"
+func.func @type_complex_f64(%arg0: tensor<complex<f64>>, %arg1: tensor<complex<f64>>) -> tensor<complex<f64>> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f64_v1>>, !vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f64_v1>>) -> !vhlo.tensor_v1<!vhlo.complex_v1<!vhlo.f64_v1>>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<complex<f64>>, tensor<complex<f64>>) -> tensor<complex<f64>>
+  func.return %0 : tensor<complex<f64>>
+}
+
+// CHECK-LABEL: "type_dynamism_ranked"
+func.func @type_dynamism_ranked(%arg0: tensor<?xf32>) -> tensor<?xf32> {
+  // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.tensor_v1<?x!vhlo.f32_v1>) -> !vhlo.tensor_v1<?x!vhlo.f32_v1>
+  %0 = "stablehlo.abs"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
+  func.return %0 : tensor<?xf32>
+}
+
+// CHECK-LABEL: "type_dynamism_unranked"
+func.func @type_dynamism_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK: "vhlo.abs_v1"(%arg0) : (!vhlo.unranked_tensor_v1<!vhlo.f32_v1>) -> !vhlo.unranked_tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.abs"(%arg0) : (tensor<*xf32>) -> tensor<*xf32>
+  func.return %0 : tensor<*xf32>
+}
+
+// CHECK-LABEL: "type_quantization"
+func.func @type_quantization(%arg0: tensor<!quant.uniform<i8:f32, 34.0:16>>, %arg1: tensor<f32>) -> tensor<f32> {
+  // CHECK: "vhlo.add_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.quant_v1<!vhlo.i8_v1:!vhlo.f32_v1, 3.400000e+01:16, -128:127, 1>>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
+  %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<!quant.uniform<i8:f32, 34.0:16>>, tensor<f32>) -> tensor<f32>
+  func.return %0 : tensor<f32>
+}
+
+//       CHECK: function_type = #vhlo.type_v1<!vhlo.func_v1<(!vhlo.token_v1) -> !vhlo.token_v1>>
+// CHECK-LABEL: "type_token_callee"
+func.func @type_token_callee(%arg0: !stablehlo.token) -> !stablehlo.token {
+  // CHECK: "vhlo.return_v1"(%arg0) : (!vhlo.token_v1) -> ()
+  return %arg0 : !stablehlo.token
+}
+
+//       CHECK: function_type = #vhlo.type_v1<!vhlo.func_v1<(!vhlo.token_v1) -> !vhlo.token_v1>>
+// CHECK-LABEL: "type_token_caller"
+func.func @type_token_caller(%arg0: !stablehlo.token) -> !stablehlo.token {
+  // CHECK:      "vhlo.call_v1"(%arg0) <{callee = #vhlo.string_v1<"type_token_callee">}
+  // CHECK-SAME: (!vhlo.token_v1) -> !vhlo.token_v1
+  %0 = func.call @type_token_callee(%arg0) : (!stablehlo.token) -> !stablehlo.token
+  return %0 : !stablehlo.token
+}
+
+// CHECK-LABEL: "type_tuple"
+func.func @type_tuple(%arg0: tuple<tensor<f32>>) -> tuple<!stablehlo.token> {
+  %0 = "stablehlo.custom_call"(%arg0) {
+    call_target_name = "foo"
+  // CHECK: (!vhlo.tuple_v1<!vhlo.tensor_v1<!vhlo.f32_v1>>) -> !vhlo.tuple_v1<!vhlo.token_v1>
+  } : (tuple<tensor<f32>>) -> tuple<!stablehlo.token>
+  return %0 : tuple<!stablehlo.token>
+}
diff --git a/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir.bc b/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir.bc
new file mode 100644
index 00000000000..162bc8b661b
Binary files /dev/null and b/stablehlo/tests/stablehlo_legalize_to_vhlo.0_17_0.mlir.bc differ
diff --git a/stablehlo/tests/stablehlo_legalize_to_vhlo.mlir b/stablehlo/tests/stablehlo_legalize_to_vhlo.mlir
index 85e9c6b94de..eb29829be6b 100644
--- a/stablehlo/tests/stablehlo_legalize_to_vhlo.mlir
+++ b/stablehlo/tests/stablehlo_legalize_to_vhlo.mlir
@@ -834,6 +834,25 @@ func.func @op_all_reduce(%arg0: tensor<f32>) -> tensor<f32> {
   func.return %0 : tensor<f32>
 }
 
+// CHECK-LABEL: "op_all_reduce_with_promotable_types"
+func.func @op_all_reduce_with_promotable_types(%operand: tensor<f32>) -> tensor<f64> {
+  //  CHECK: "vhlo.all_reduce_v1"(%arg0)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f64_v1>
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<f64>, %arg1: tensor<f64>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%0) : (tensor<f64>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+    use_global_device_ids
+  } : (tensor<f32>) -> tensor<f64>
+
+  func.return %result : tensor<f64>
+}
+
 // CHECK-LABEL: "op_all_to_all"
 func.func @op_all_to_all(%arg0: tensor<4x16xf32>) -> tensor<16x4xf32> {
   //               CHECK: "vhlo.all_to_all_v1"(%arg0) <{
@@ -1646,6 +1665,23 @@ func.func @op_reduce_precision(%arg0: tensor<f32>) -> tensor<f32> {
   func.return %0 : tensor<f32>
 }
 
+// CHECK_lABEL: "op_reduce_with_promotable_types"
+func.func @op_reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor<f32>)
+    -> (tensor<4xf64>) {
+  //  CHECK: "vhlo.reduce_v1"(%arg0, %arg1)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x4x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x!vhlo.f64_v1>
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+  ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xf32>, tensor<f32>) -> tensor<4xf64>
+
+  func.return %0: tensor<4xf64>
+}
+
 // CHECK-LABEL: "op_reduce_scatter"
 func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> {
   //               CHECK: "vhlo.reduce_scatter_v1"(%arg0) <{
@@ -1671,6 +1707,24 @@ func.func @op_reduce_scatter(%arg0: tensor<16xf32>) -> tensor<16xf32> {
   func.return %0 : tensor<16xf32>
 }
 
+// CHECK_lABEL: "op_reduce_scatter_with_promotable_types"
+func.func @op_reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> {
+  //  CHECK: "vhlo.reduce_scatter_v1"(%arg0)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x16x!vhlo.f32_v1>) -> !vhlo.tensor_v1<4x4x!vhlo.f64_v1>
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xf32>) -> tensor<4x4xf64>
+  func.return %0 : tensor<4x4xf64>
+}
+
+
 // CHECK-LABEL: "op_reduce_window"
 func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor<f32>) -> tensor<2x9x16x7xf32> {
   //               CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1) <{
@@ -1698,6 +1752,29 @@ func.func @op_reduce_window(%arg0: tensor<2x17x31x7xf32>, %arg1: tensor<f32>) ->
   func.return %0 : tensor<2x9x16x7xf32>
 }
 
+// CHECK_lABEL: "op_reduce_window_with_promotable_types"
+func.func @op_reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>,
+    %arg1: tensor<4x2xf32>, %init0: tensor<f32>, %init1: tensor<f32>) ->
+    (tensor<2x2xf64>, tensor<2x2xf32>) {
+  //  CHECK: "vhlo.reduce_window_v1"(%arg0, %arg1, %arg2, %arg3)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>, %[[ARG3:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG4:arg.*]]: !vhlo.tensor_v1<!vhlo.f32_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]], %[[VAL2:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<4x2x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> (!vhlo.tensor_v1<2x2x!vhlo.f64_v1>, !vhlo.tensor_v1<2x2x!vhlo.f32_v1>)
+  %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
+         ^bb0(%a0: tensor<f64>, %a1: tensor<f32>, %b0: tensor<f64>,
+                %b1: tensor<f32>):
+              %2 = stablehlo.add %a0, %b0 : tensor<f64>
+              %3 = stablehlo.add %a1, %b1 : tensor<f32>
+              "stablehlo.return"(%2,%3) : (tensor<f64>, tensor<f32>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64> }
+         : (tensor<4x2xf32>, tensor<4x2xf32>, tensor<f32>, tensor<f32>) ->
+              (tensor<2x2xf64>, tensor<2x2xf32>)
+  func.return %0#0, %0#1 : tensor<2x2xf64>, tensor<2x2xf32>
+}
+
 // CHECK-LABEL: "op_remainder"
 func.func @op_remainder(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
   // CHECK: "vhlo.remainder_v1"(%arg0, %arg1) : (!vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
@@ -1822,6 +1899,32 @@ func.func @op_scatter(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>, %
   func.return %0 : tensor<200x100x300xf32>
 }
 
+// CHECK_lABEL: "op_scatter_with_promotable_types"
+func.func @op_scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
+      tensor<200x100x300xf64> {
+  //  CHECK: "vhlo.scatter_v1"(%arg0, %arg1, %arg2)
+  //  CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  //  CHECK:     "vhlo.return_v1"(%[[VAL1:.*]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  //  CHECK: }) : (!vhlo.tensor_v1<200x100x300x!vhlo.f32_v1>, !vhlo.tensor_v1<10x2x!vhlo.i32_v1>, !vhlo.tensor_v1<10x300x!vhlo.f32_v1>) -> !vhlo.tensor_v1<200x100x300x!vhlo.f64_v1>
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<f64>, %rhs: tensor<f64>):
+    %add = stablehlo.add %lhs, %rhs : tensor<f64>
+    "stablehlo.return"(%add) : (tensor<f64>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) ->
+      tensor<200x100x300xf64>
+  func.return %0 : tensor<200x100x300xf64>
+}
+
 // CHECK-LABEL: "op_select_and_scatter"
 func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor<f32>) -> tensor<10x24x24x64xf32> {
   //      CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2) <{
@@ -1853,6 +1956,28 @@ func.func @op_select_and_scatter(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<1
   func.return %0 : tensor<10x24x24x64xf32>
 }
 
+// CHECK-LABEL: "op_select_and_scatter_with_promotable_types"
+func.func @op_select_and_scatter_with_promotable_types(%arg0: tensor<10x24x24x64xf32>, %arg1: tensor<12x13x13x66xf32>, %arg2: tensor<f32>) -> tensor<10x24x24x64xf64> {
+  // CHECK: "vhlo.select_and_scatter_v1"(%arg0, %arg1, %arg2)
+  // CHECK:   ^[[BB:bb.*]](%[[ARG1:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>, %[[ARG2:arg.*]]: !vhlo.tensor_v1<!vhlo.f64_v1>):
+  // CHECK:     "vhlo.return_v1"(%[[VAL12]]) : (!vhlo.tensor_v1<!vhlo.f64_v1>) -> ()
+  // CHECK: }) : (!vhlo.tensor_v1<10x24x24x64x!vhlo.f32_v1>, !vhlo.tensor_v1<12x13x13x66x!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<10x24x24x64x!vhlo.f64_v1>
+  %0 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %1 = "stablehlo.compare"(%arg3, %arg4) {compare_type = #stablehlo<comparison_type TOTALORDER>, comparison_direction = #stablehlo<comparison_direction GE>} : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%1) : (tensor<i1>) -> ()
+  }, {
+    ^bb0(%arg3: tensor<f64>, %arg4: tensor<f64>):
+      %1 = "stablehlo.add"(%arg3, %arg4) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    padding = dense<1> : tensor<4x2xi64>
+  } : (tensor<10x24x24x64xf32>, tensor<12x13x13x66xf32>, tensor<f32>) -> tensor<10x24x24x64xf64>
+  func.return %0 : tensor<10x24x24x64xf64>
+}
+
 // CHECK-LABEL: "op_select"
 func.func @op_select(%arg0: tensor<i1>, %arg1: tensor<f32>, %arg2: tensor<f32>) -> tensor<f32> {
   // CHECK: "vhlo.select_v1"(%arg0, %arg1, %arg2) : (!vhlo.tensor_v1<!vhlo.bool_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>, !vhlo.tensor_v1<!vhlo.f32_v1>) -> !vhlo.tensor_v1<!vhlo.f32_v1>
diff --git a/stablehlo/tests/verify_reduce.mlir b/stablehlo/tests/verify_reduce.mlir
index 8915f4941aa..f48a2db01f7 100644
--- a/stablehlo/tests/verify_reduce.mlir
+++ b/stablehlo/tests/verify_reduce.mlir
@@ -96,6 +96,35 @@ func.func @reduce_mix_rank_and_unranked(%arg0: tensor<4x4xf32>, %arg1: tensor<*x
 
 // -----
 
+// CHECK-LABEL: func @reduce_with_promotable_types
+func.func @reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor<f32>)
+    -> (tensor<4xf64>) {
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+
+  ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xf32>, tensor<f32>) -> tensor<4xf64>
+
+  func.return %0: tensor<4xf64>
+}
+
+// -----
+
+// CHECK-LABEL: func @reduce_with_promotable_quantized_types
+func.func @reduce_with_promotable_quantized_types(%arg0: tensor<4x4x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg1: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+  %0 = stablehlo.reduce(%arg0 init: %arg1) across dimensions = [0] : (tensor<4x4x!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+  reducer(%arg2: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %arg3: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>)  {
+    %1 = stablehlo.add %arg2, %arg3 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    stablehlo.return %1 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+  }
+  return %0 : tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_c1(%arg0: tensor<2x3xf32>, %arg1: tensor<3x2xf32>,
     %arg2: tensor<f32>, %arg3: tensor<f32>) -> (tensor<2xf32>, tensor<2xf32>) {
 
@@ -307,7 +336,7 @@ func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
 func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
     %arg2: tensor<f32>, %arg3: tensor<i32>) -> (tensor<?xf32>, tensor<?xi32>) {
 
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<i32>' vs 'tensor<f32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i32>' vs 'tensor<f32>'}}
   %0:2 = "stablehlo.reduce"(%arg0, %arg1, %arg2, %arg3) ({
 
   ^bb0(%arg4: tensor<i32>, %arg5: tensor<i32>, %arg6: tensor<i32>, %arg7: tensor<i32>):
@@ -325,7 +354,7 @@ func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
 func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
     %arg2: tensor<f32>, %arg3: tensor<i32>) -> (tensor<?xf32>, tensor<?xi32>) {
 
-  // expected-error@+1 {{The type of reduction-region's result type at index 1 differs from the op's corresponding init-value type: 'tensor<f32>' vs 'tensor<i32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 1 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<f32>' vs 'tensor<i32>'}}
   %0:2 = "stablehlo.reduce"(%arg0, %arg1, %arg2, %arg3) ({
 
   ^bb0(%arg4: tensor<f32>, %arg5: tensor<f32>, %arg6: tensor<f32>, %arg7: tensor<f32>):
@@ -343,7 +372,7 @@ func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
 func.func @reduce_c6(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xi32>,
     %arg2: tensor<f32>, %arg3: tensor<f32>) -> (tensor<?xf32>, tensor<?xf32>) {
 
-  // expected-error@+1 {{The element-type of reduction-region's argument at index 3 is expected to be 'i32', but got 'tensor<f32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 3 is expected to be promotable from 'i32', but got 'f32'}}
   %0:2 = "stablehlo.reduce"(%arg0, %arg1, %arg2, %arg3) ({
 
   ^bb0(%arg4: tensor<f32>, %arg5: tensor<f32>, %arg6: tensor<f32>, %arg7: tensor<f32>):
@@ -392,6 +421,73 @@ func.func @reduce_c6(%arg0: tensor<8x5xf32>, %arg1 : tensor<4xf32>)
 
 // -----
 
+
+func.func @reduce_c6(%arg0: tensor<4x4xi32>, %arg1 : tensor<i32>)
+    -> (tensor<4xi8>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+
+  ^bb0(%arg2: tensor<i8>, %arg3: tensor<i8> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+    "stablehlo.return"(%1) : (tensor<i8>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xi32>, tensor<i32>) -> tensor<4xi8>
+
+  func.return %0: tensor<4xi8>
+}
+
+// -----
+
+func.func @reduce_c6(%arg0: tensor<4x4xi32>, %arg1 : tensor<i8>)
+    -> (tensor<4xi8>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'i32', but got 'i8'}}
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+
+  ^bb0(%arg2: tensor<i8>, %arg3: tensor<i8> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<i8>, tensor<i8>) -> tensor<i8>
+    "stablehlo.return"(%1) : (tensor<i8>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xi32>, tensor<i8>) -> tensor<4xi8>
+
+  func.return %0: tensor<4xi8>
+}
+
+// -----
+
+func.func @reduce_c6(%arg0: tensor<4x4x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg1: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f64, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %0 = stablehlo.reduce(%arg0 init: %arg1) across dimensions = [0] : (tensor<4x4x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+      tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f64, 2.000000e+00:15>>
+
+  reducer(%arg2: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %arg3: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>)  {
+    %1 = stablehlo.add %arg2, %arg3 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+    stablehlo.return %1 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+  }
+  return %0 : tensor<4x!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
+
+// -----
+
+func.func @reduce_c6(%arg0: tensor<4x4x!quant.uniform<i8:f64, 2.000000e+00:15>>,
+  %arg1: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from '!quant.uniform<i8:f64, 2.000000e+00:15>', but got '!quant.uniform<i32:f32, 2.000000e+00:15>'}}
+  %0 = stablehlo.reduce(%arg0 init: %arg1) across dimensions = [0] : (tensor<4x4x!quant.uniform<i8:f64, 2.000000e+00:15>>,
+      tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+
+  reducer(%arg2: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %arg3: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>)  {
+    %1 = stablehlo.add %arg2, %arg3 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    stablehlo.return %1 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+  }
+  return %0 : tensor<4x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_i3(%input: tensor<1x6xi64>, %init_value: tensor<i64>) -> tensor<1xi64> {
   // expected-error@+1 {{dimensions must be rank 1}}
   %0 = "stablehlo.reduce"(%input, %init_value) ({
diff --git a/stablehlo/tests/verify_reduce_window.mlir b/stablehlo/tests/verify_reduce_window.mlir
index f410243b1b9..62d9b4d80e0 100644
--- a/stablehlo/tests/verify_reduce_window.mlir
+++ b/stablehlo/tests/verify_reduce_window.mlir
@@ -82,6 +82,46 @@ func.func @reduce_window_with_unranked_dynamic_dims(%arg0: tensor<*xf32>,
 
 // -----
 
+// CHECK-LABEL: func @reduce_window_with_promotable_types
+func.func @reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>,
+    %arg1: tensor<4x2xf32>, %init0: tensor<f32>, %init1: tensor<f32>) ->
+    (tensor<2x2xf64>, tensor<2x2xf32>) {
+  %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
+         ^bb0(%a0: tensor<f64>, %a1: tensor<f32>, %b0: tensor<f64>,
+                %b1: tensor<f32>):
+              %2 = stablehlo.add %a0, %b0 : tensor<f64>
+              %3 = stablehlo.add %a1, %b1 : tensor<f32>
+              "stablehlo.return"(%2,%3) : (tensor<f64>, tensor<f32>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64> }
+         : (tensor<4x2xf32>, tensor<4x2xf32>, tensor<f32>, tensor<f32>) ->
+              (tensor<2x2xf64>, tensor<2x2xf32>)
+  func.return %0#0, %0#1 : tensor<2x2xf64>, tensor<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @reduce_window_with_promotable_quantized_types
+func.func @reduce_window_with_promotable_quantized_types(%arg0: tensor<4x2x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+    %init0: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>) {
+
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %b0: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+              %1 = stablehlo.add %a0, %b0 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+              "stablehlo.return"(%1) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2x!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>)
+  func.return %0 : tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_window_c1(%arg0: tensor<4x2xf32>, %arg1: tensor<4x2xi32>,
                     %init0: tensor<f32>, %init1: tensor<i32>) ->
                       (tensor<2x2xf32>, tensor<2x2xi32>) {
@@ -123,7 +163,7 @@ func.func @reduce_window_c2(%arg0: tensor<4x2xf32>,
 func.func @reduce_window_c3(%arg0: tensor<4x2xf32>,
     %arg1: tensor<4x2xi32>, %init0: tensor<f32>, %init1: tensor<f32>) ->
     (tensor<2x2xf32>, tensor<2x2xf32>) {
-  // expected-error@+1 {{The element-type of reduction-region's argument at index 3 is expected to be 'i32', but got 'tensor<f32>' as its type.}}
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 3 is expected to be promotable from 'i32', but got 'f32'}}
   %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
          ^bb0(%a0: tensor<f32>, %a1: tensor<f32>, %b0: tensor<f32>,
                 %b1: tensor<f32>):
@@ -490,7 +530,7 @@ func.func @reduce_window_c13(%arg0: tensor<4x2xf32>,
 func.func @reduce_window_c13(%arg0: tensor<4x2xf32>,
     %arg1: tensor<4x2xi32>, %init0: tensor<f32>, %init1: tensor<i32>) ->
     (tensor<2x2xf32>, tensor<2x2xi32>) {
-  // expected-error@+1 {{The type of reduction-region's result type at index 1 differs from the op's corresponding init-value type: 'tensor<f32>' vs 'tensor<i32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 1 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<f32>' vs 'tensor<i32>'}}
   %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
          ^bb0(%a0: tensor<f32>, %a1: tensor<f32>, %b0: tensor<f32>,
                 %b1: tensor<f32>):
@@ -544,10 +584,86 @@ func.func @reduce_window_c13(%arg0: tensor<4x2xf32>, %init0: tensor<4x2xf32>)
 
 // -----
 
+func.func @reduce_window_c13(%arg0: tensor<4x2xi32>, %init0: tensor<i32>) ->
+        (tensor<2x2xi8>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<i8>, %b0: tensor<i8>):
+              %1 = stablehlo.add %a0, %b0 : tensor<i8>
+              "stablehlo.return"(%1) : (tensor<i8>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2xi32>, tensor<i32>) -> (tensor<2x2xi8>)
+  func.return %0 : tensor<2x2xi8>
+}
+
+// -----
+
+func.func @reduce_window_c13(%arg0: tensor<4x2xi32>, %init0: tensor<i8>) ->
+        (tensor<2x2xi8>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'i32', but got 'i8'}}
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<i8>, %b0: tensor<i8>):
+              %1 = stablehlo.add %a0, %b0 : tensor<i8>
+              "stablehlo.return"(%1) : (tensor<i8>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2xi32>, tensor<i8>) -> (tensor<2x2xi8>)
+  func.return %0 : tensor<2x2xi8>
+}
+
+// -----
+
+func.func @reduce_window_c13(%arg0: tensor<4x2x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+    %init0: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f64, 2.000000e+00:15>>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %b0: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>):
+              %1 = stablehlo.add %a0, %b0 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+              "stablehlo.return"(%1) : (tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2x!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f64, 2.000000e+00:15>>)
+  func.return %0 : tensor<2x2x!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
+
+// -----
+
+func.func @reduce_window_c13(%arg0: tensor<4x2x!quant.uniform<i8:f64, 2.000000e+00:15>>,
+    %init0: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>) {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from '!quant.uniform<i8:f64, 2.000000e+00:15>', but got '!quant.uniform<i32:f32, 2.000000e+00:15>'}}
+  %0 = "stablehlo.reduce_window"(%arg0, %init0) ({
+         ^bb0(%a0: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %b0: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+              %1 = stablehlo.add %a0, %b0 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+              "stablehlo.return"(%1) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64>
+         }
+         : (tensor<4x2x!quant.uniform<i8:f64, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> (tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>)
+  func.return %0 : tensor<2x2x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @reduce_window_i2(%arg0: tensor<4x2xf32>, %arg1: tensor<4x2xi32>,
                     %init0: tensor<1xf32>, %init1: tensor<1xi32>) ->
                       (tensor<2x2xf32>, tensor<2x2xi32>) {
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<f32>' vs 'tensor<1xf32>'}}
+  // expected-error@+1 {{The shape of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<f32>' vs 'tensor<1xf32>'}}
   %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
          ^bb0(%a0: tensor<f32>, %a1: tensor<i32>,
                 %b0: tensor<f32>, %b1: tensor<i32>):
diff --git a/stablehlo/tests/verify_scatter.mlir b/stablehlo/tests/verify_scatter.mlir
index 1b7ec22fdcd..cc7284a6892 100644
--- a/stablehlo/tests/verify_scatter.mlir
+++ b/stablehlo/tests/verify_scatter.mlir
@@ -71,6 +71,55 @@ func.func @valid_scatter_dimensions_with_dynamic_index_vector_dim(
 
 // -----
 
+// CHECK: func @scatter_with_promotable_types
+func.func @scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
+      tensor<200x100x300xf64> {
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<f64>, %rhs: tensor<f64>):
+    %add = stablehlo.add %lhs, %rhs : tensor<f64>
+    "stablehlo.return"(%add) : (tensor<f64>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) ->
+      tensor<200x100x300xf64>
+  func.return %0 : tensor<200x100x300xf64>
+}
+
+// -----
+
+// CHECK: func @scatter_with_promotable_quantized_types
+func.func @scatter_with_promotable_quantized_types(%input_tensor: tensor<200x100x300x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %rhs: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+    %add = stablehlo.add %lhs, %rhs : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    "stablehlo.return"(%add) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300x!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<10x2xi32>,
+      tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>>
+  func.return %0 : tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 func.func @scatter_c1(%arg0: tensor<3xi32>, %arg1: tensor<1x1xi32>,
                             %arg2: tensor<1xi32>) -> tensor<3xi32> {
   // expected-error @+1 {{Not all inputs have compatible shapes.}}
@@ -233,7 +282,7 @@ func.func @scatter_c4(%input_tensor: tensor<200x100x300xf32>,
 func.func @scatter_c6_c15(%input_tensor: tensor<200x100x300xf32>,
     %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xi32>) ->
       tensor<200x100x300xf32> {
-  // expected-error@+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<f32>' vs 'tensor<i32>'}}
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<f32>' vs 'tensor<i32>'}}
   %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
   ^bb0(%lhs: tensor<f32>, %rhs: tensor<f32>):
     %add = stablehlo.add %lhs, %rhs :  tensor<f32>
@@ -257,7 +306,7 @@ func.func @scatter_c6_c15(%input_tensor: tensor<200x100x300xf32>,
 func.func @scatter_c6_c15_c16(%input_tensor: tensor<200x100x300xi32>,
     %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
       tensor<200x100x300xf32> {
-  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be 'i32', but got 'tensor<f32>' as its type.}}
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'i32', but got 'f32'}}
   %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
   ^bb0(%lhs: tensor<f32>, %rhs: tensor<f32>):
     %add = stablehlo.add %lhs, %rhs :  tensor<f32>
@@ -794,3 +843,103 @@ func.func @scatter_c15(%input_tensor: tensor<200x100x300xf32>,
       tensor<200x100x300xf32>
   func.return %0 : tensor<200x100x300xf32>
 }
+
+// -----
+
+func.func @scatter_c15(%input_tensor: tensor<200x100x300xi32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xi32>) ->
+      tensor<200x100x300xi8> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<i8>, %rhs: tensor<i8>):
+    %add = stablehlo.add %lhs, %rhs : tensor<i8>
+    "stablehlo.return"(%add) : (tensor<i8>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xi32>, tensor<10x2xi32>, tensor<10x300xi32>) ->
+      tensor<200x100x300xi8>
+  func.return %0 : tensor<200x100x300xi8>
+}
+
+// -----
+
+func.func @scatter_c15(%input_tensor: tensor<200x100x300xi32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xi8>) ->
+      tensor<200x100x300xi8> {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'i32', but got 'i8'}}
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<i8>, %rhs: tensor<i8>):
+    %add = stablehlo.add %lhs, %rhs : tensor<i8>
+    "stablehlo.return"(%add) : (tensor<i8>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xi32>, tensor<10x2xi32>, tensor<10x300xi8>) ->
+      tensor<200x100x300xi8>
+  func.return %0 : tensor<200x100x300xi8>
+}
+
+// -----
+
+func.func @scatter_c15(%input_tensor: tensor<200x100x300x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f64, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %rhs: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>):
+    %add = stablehlo.add %lhs, %rhs : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+    "stablehlo.return"(%add) : (tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300x!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<10x2xi32>, tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f64, 2.000000e+00:15>>
+  func.return %0 : tensor<200x100x300x!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
+
+// -----
+
+func.func @scatter_c15(%input_tensor: tensor<200x100x300x!quant.uniform<i8:f64, 2.000000e+00:15>>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from '!quant.uniform<i8:f64, 2.000000e+00:15>', but got '!quant.uniform<i32:f32, 2.000000e+00:15>'}}
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %rhs: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+    %add = stablehlo.add %lhs, %rhs : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    "stablehlo.return"(%add) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300x!quant.uniform<i8:f64, 2.000000e+00:15>>, tensor<10x2xi32>, tensor<10x300x!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>>
+  func.return %0 : tensor<200x100x300x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
diff --git a/stablehlo/tests/verify_select_and_scatter.mlir b/stablehlo/tests/verify_select_and_scatter.mlir
index bede69572aa..5b98b6c6afc 100644
--- a/stablehlo/tests/verify_select_and_scatter.mlir
+++ b/stablehlo/tests/verify_select_and_scatter.mlir
@@ -26,6 +26,62 @@ func.func @select_and_scatter(
 
 // -----
 
+// CHECK: func @select_and_scatter_with_promotable_types
+func.func @select_and_scatter_with_promotable_types(
+    %arg0: tensor<10x24x24x64xf32>,
+    %arg1: tensor<10x12x12x64xf32>) -> () {
+    %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
+    %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %2 = "stablehlo.compare"(%arg3, %arg4) {
+        comparison_direction = #stablehlo<comparison_direction GE>
+        } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%2) : (tensor<i1>) -> ()
+    },  {
+    ^bb0(%arg3: tensor<f64>, %arg4: tensor<f64>):
+      %2 = stablehlo.add %arg3, %arg4 : tensor<f64>
+      "stablehlo.return"(%2) : (tensor<f64>) -> ()
+    }) {
+      window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      padding = dense<0> : tensor<4x2xi64>
+    } : (tensor<10x24x24x64xf32>, tensor<10x12x12x64xf32>, tensor<f32>) ->
+          tensor<10x24x24x64xf64>
+    func.return
+}
+
+// -----
+
+// CHECK: func @select_and_scatter_with_promotable_quantized_types
+func.func @select_and_scatter_with_promotable_quantized_types(
+  %arg0: tensor<10x24x24x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg1: tensor<10x12x12x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg2 : tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+  tensor<10x24x24x64x!quant.uniform<i32:f32, 2.000000e+00:15>> {
+
+  %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+  ^bb0(%arg3: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>, %arg4: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>):
+    %2 = "stablehlo.compare"(%arg3, %arg4) {
+      compare_type = #stablehlo<comparison_type TOTALORDER>,
+      comparison_direction = #stablehlo<comparison_direction GE>
+      } : (tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<i1>
+    "stablehlo.return"(%2) : (tensor<i1>) -> ()
+  },  {
+  ^bb0(%arg3: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>, %arg4: tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>):
+    %2 = stablehlo.add %arg3, %arg4 : tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>
+    "stablehlo.return"(%2) : (tensor<!quant.uniform<i32:f32, 2.000000e+00:15>>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<10x24x24x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+      tensor<10x12x12x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+      tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<10x24x24x64x!quant.uniform<i32:f32, 2.000000e+00:15>>
+  func.return %1 : tensor<10x24x24x64x!quant.uniform<i32:f32, 2.000000e+00:15>>
+}
+
+// -----
+
 // CHECK: func @select_and_scatter_with_unranked_dims
 func.func @select_and_scatter_with_unranked_dims(
   %arg0: tensor<4x5x1x1xbf16>,
@@ -607,7 +663,7 @@ func.func @select_and_scatter_c10(
     %arg0: tensor<10x24x24x64xf32>,
     %arg1: tensor<10x12x12x64xf32>) -> () {
     %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
-    // expected-error @+1 {{The type of reduction-region's result type at index 0 differs from the op's corresponding init-value type: 'tensor<i32>' vs 'tensor<f32>'}}
+    // expected-error @+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i32>' vs 'tensor<f32>'}}
     %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
     ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
       %2 = "stablehlo.compare"(%arg3, %arg4) {
@@ -633,7 +689,7 @@ func.func @select_and_scatter_c10(
     %arg0: tensor<10x24x24x64xf32>,
     %arg1: tensor<10x12x12x64xf32>) -> () {
     %0 = stablehlo.constant dense<0> : tensor<i32>
-    // expected-error @+1 {{The element-type of reduction-region's argument at index 1 is expected to be 'f32', but got 'tensor<i32>' as its type.}}
+    // expected-error @+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'f32', but got 'i32'}}
     %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
     ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
       %2 = "stablehlo.compare"(%arg3, %arg4) {
@@ -678,3 +734,86 @@ func.func @select_and_scatter_c10(
           tensor<10x24x24x64xf32>
     func.return
 }
+
+// -----
+
+func.func @select_and_scatter_c10(
+  %arg0: tensor<10x24x24x64xi32>,
+  %arg1: tensor<10x12x12x64xi32>) -> tensor<10x24x24x64xi8> {
+  %0 = stablehlo.constant dense<0> : tensor<i32>
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<i8>' vs 'tensor<i32>'}}
+  %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
+  ^bb0(%arg3: tensor<i32>, %arg4: tensor<i32>):
+    %2 = "stablehlo.compare"(%arg3, %arg4) {
+      compare_type = #stablehlo<comparison_type TOTALORDER>,
+      comparison_direction = #stablehlo<comparison_direction GE>
+      } : (tensor<i32>, tensor<i32>) -> tensor<i1>
+    "stablehlo.return"(%2) : (tensor<i1>) -> ()
+  },  {
+  ^bb0(%arg3: tensor<i8>, %arg4: tensor<i8>):
+    %2 = stablehlo.add %arg3, %arg4 : tensor<i8>
+    "stablehlo.return"(%2) : (tensor<i8>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<10x24x24x64xi32>, tensor<10x12x12x64xi32>, tensor<i32>) ->
+        tensor<10x24x24x64xi8>
+  func.return %1 : tensor<10x24x24x64xi8>
+}
+
+// -----
+
+func.func @select_and_scatter_c10(
+    %arg0: tensor<10x24x24x64xf32>,
+    %arg1: tensor<10x12x12x64xf32>) -> () {
+    %0 = stablehlo.constant dense<0> : tensor<i8>
+    // expected-error @+1 {{The element-type of reduction-region's argument at index 1 is expected to be promotable from 'f32', but got 'i8'}}
+    %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %2 = "stablehlo.compare"(%arg3, %arg4) {
+        comparison_direction = #stablehlo<comparison_direction GE>
+        } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%2) : (tensor<i1>) -> ()
+    },  {
+    ^bb0(%arg3: tensor<i8>, %arg4: tensor<i8>):
+      %2 = stablehlo.add %arg3, %arg4 : tensor<i8>
+      "stablehlo.return"(%2) : (tensor<i8>) -> ()
+    }) {
+      window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      padding = dense<0> : tensor<4x2xi64>
+    } : (tensor<10x24x24x64xf32>, tensor<10x12x12x64xf32>, tensor<i8>) ->
+          tensor<10x24x24x64xf32>
+    func.return
+}
+
+// -----
+
+func.func @select_and_scatter_c10(
+  %arg0: tensor<10x24x24x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg1: tensor<10x12x12x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+  %arg2 : tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+  tensor<10x24x24x64x!quant.uniform<i32:f64, 2.000000e+00:15>> {
+
+  // expected-error@+1 {{The element-type of reduction-region's result type at index 0 is expected to be promotable from the op's corresponding init-value element-type: 'tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>' vs 'tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>'}}
+  %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %arg2) ({
+  ^bb0(%arg3: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>, %arg4: tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>):
+    %2 = "stablehlo.compare"(%arg3, %arg4) {
+      compare_type = #stablehlo<comparison_type TOTALORDER>,
+      comparison_direction = #stablehlo<comparison_direction GE>
+      } : (tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>, tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) -> tensor<i1>
+    "stablehlo.return"(%2) : (tensor<i1>) -> ()
+  },  {
+  ^bb0(%arg3: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>, %arg4: tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>):
+    %2 = stablehlo.add %arg3, %arg4 : tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>
+    "stablehlo.return"(%2) : (tensor<!quant.uniform<i32:f64, 2.000000e+00:15>>) -> ()
+  }) {
+    window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+    window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>
+  } : (tensor<10x24x24x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+      tensor<10x12x12x64x!quant.uniform<i8:f32, 2.000000e+00:15>>,
+      tensor<!quant.uniform<i8:f32, 2.000000e+00:15>>) ->
+      tensor<10x24x24x64x!quant.uniform<i32:f64, 2.000000e+00:15>>
+  func.return %1 : tensor<10x24x24x64x!quant.uniform<i32:f64, 2.000000e+00:15>>
+}
diff --git a/stablehlo/tests/vhlo_to_version_downgrade_invalid.0_16_0.mlir b/stablehlo/tests/vhlo_to_version_downgrade_invalid.0_16_0.mlir
new file mode 100644
index 00000000000..3befa48e1f4
--- /dev/null
+++ b/stablehlo/tests/vhlo_to_version_downgrade_invalid.0_16_0.mlir
@@ -0,0 +1,123 @@
+// RUN: stablehlo-opt --stablehlo-legalize-to-vhlo --vhlo-to-version='target=0.16.0' --verify-diagnostics --split-input-file %s
+
+func.func @reduce_with_promotable_types(%arg0: tensor<4x4xf32>, %arg1 : tensor<f32>)
+    -> (tensor<4xf64>) {
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.reduce_v1' that was explicitly marked illegal}}
+  %0 = "stablehlo.reduce"(%arg0, %arg1) ({
+
+  ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64> ):
+    %1 = "stablehlo.add"(%arg2, %arg3) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+
+  }) {dimensions = dense<[0]> : tensor<1xi64>} : (tensor<4x4xf32>, tensor<f32>) -> tensor<4xf64>
+
+  func.return %0: tensor<4xf64>
+}
+
+// -----
+
+func.func @all_reduce_with_promotable_types(%operand: tensor<f32>) -> tensor<f64> {
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.all_reduce_v1' that was explicitly marked illegal}}
+  %result = "stablehlo.all_reduce"(%operand) ({
+    ^bb0(%arg0: tensor<f64>, %arg1: tensor<f64>):
+      %0 = "stablehlo.add"(%arg0, %arg1) : (tensor<f64>, tensor<f64>) -> tensor<f64>
+      "stablehlo.return"(%0) : (tensor<f64>) -> ()
+  }) {
+    replica_groups = dense<[[0, 1]]> : tensor<1x2xi64>,
+    channel_handle = #stablehlo.channel_handle<handle = 0, type = 0>
+  } : (tensor<f32>) -> tensor<f64>
+
+  func.return %result : tensor<f64>
+}
+
+// -----
+
+func.func @reduce_scatter_with_promotable_types(%data: tensor<4x16xf32>) -> tensor<4x4xf64> {
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.reduce_scatter_v1' that was explicitly marked illegal}}
+  %0 = "stablehlo.reduce_scatter"(%data) ({
+    ^bb0(%arg2: tensor<f64>, %arg3: tensor<f64>):
+    %1 = stablehlo.add %arg2, %arg3 : tensor<f64>
+    "stablehlo.return"(%1) : (tensor<f64>) -> ()
+  }) {replica_groups = dense<[[0, 1, 2, 3]]> : tensor<1x4xi64>,
+      scatter_dimension = 1 : i64,
+      channel_handle = #stablehlo.channel_handle<handle = 1, type = 0>,
+      use_global_device_ids} : (tensor<4x16xf32>) -> tensor<4x4xf64>
+  func.return %0 : tensor<4x4xf64>
+}
+
+// -----
+
+func.func @reduce_window_with_promotable_types(%arg0: tensor<4x2xf32>,
+    %arg1: tensor<4x2xf32>, %init0: tensor<f32>, %init1: tensor<f32>) ->
+    (tensor<2x2xf64>, tensor<2x2xf32>) {
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.reduce_window_v1' that was explicitly marked illegal}}
+  %0:2 = "stablehlo.reduce_window"(%arg0, %arg1, %init0, %init1) ({
+         ^bb0(%a0: tensor<f64>, %a1: tensor<f32>, %b0: tensor<f64>,
+                %b1: tensor<f32>):
+              %2 = stablehlo.add %a0, %b0 : tensor<f64>
+              %3 = stablehlo.add %a1, %b1 : tensor<f32>
+              "stablehlo.return"(%2,%3) : (tensor<f64>, tensor<f32>) -> ()
+            })
+         { padding = dense<[[2, 2], [0, 0]]> : tensor<2x2xi64>,
+           window_dimensions = dense<[5, 1]> : tensor<2xi64>,
+           window_strides = dense<[3, 1]> : tensor<2xi64> }
+         : (tensor<4x2xf32>, tensor<4x2xf32>, tensor<f32>, tensor<f32>) ->
+              (tensor<2x2xf64>, tensor<2x2xf32>)
+  func.return %0#0, %0#1 : tensor<2x2xf64>, tensor<2x2xf32>
+}
+
+// -----
+
+func.func @scatter_with_promotable_types(%input_tensor: tensor<200x100x300xf32>,
+    %scatter_indices: tensor<10x2xi32>, %updates: tensor<10x300xf32>) ->
+      tensor<200x100x300xf64> {
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.scatter_v1' that was explicitly marked illegal}}
+  %0 = "stablehlo.scatter" (%input_tensor, %scatter_indices, %updates) ({
+  ^bb0(%lhs: tensor<f64>, %rhs: tensor<f64>):
+    %add = stablehlo.add %lhs, %rhs : tensor<f64>
+    "stablehlo.return"(%add) : (tensor<f64>) -> ()
+  }) {
+    scatter_dimension_numbers = #stablehlo.scatter<
+      update_window_dims = [1],
+      inserted_window_dims = [0, 1],
+      scatter_dims_to_operand_dims = [0, 1],
+      index_vector_dim = 1
+    >,
+    indices_are_sorted = true,
+    unique_indices = true
+  } : (tensor<200x100x300xf32>, tensor<10x2xi32>, tensor<10x300xf32>) ->
+      tensor<200x100x300xf64>
+  func.return %0 : tensor<200x100x300xf64>
+}
+
+// -----
+
+func.func @select_and_scatter_with_promotable_types(
+    %arg0: tensor<10x24x24x64xf32>,
+    %arg1: tensor<10x12x12x64xf32>) -> () {
+    %0 = stablehlo.constant dense<0.000000e+00> : tensor<f32>
+
+  // expected-error @+1 {{failed to legalize operation 'vhlo.select_and_scatter_v1' that was explicitly marked illegal}}
+    %1 = "stablehlo.select_and_scatter"(%arg0, %arg1, %0) ({
+    ^bb0(%arg3: tensor<f32>, %arg4: tensor<f32>):
+      %2 = "stablehlo.compare"(%arg3, %arg4) {
+        comparison_direction = #stablehlo<comparison_direction GE>
+        } : (tensor<f32>, tensor<f32>) -> tensor<i1>
+      "stablehlo.return"(%2) : (tensor<i1>) -> ()
+    },  {
+    ^bb0(%arg3: tensor<f64>, %arg4: tensor<f64>):
+      %2 = stablehlo.add %arg3, %arg4 : tensor<f64>
+      "stablehlo.return"(%2) : (tensor<f64>) -> ()
+    }) {
+      window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>,
+      padding = dense<0> : tensor<4x2xi64>
+    } : (tensor<10x24x24x64xf32>, tensor<10x12x12x64xf32>, tensor<f32>) ->
+          tensor<10x24x24x64xf64>
+    func.return
+}
diff --git a/stablehlo/transforms/VhloToVersion.cpp b/stablehlo/transforms/VhloToVersion.cpp
index 6534ebe7899..a8cf57d1aa6 100644
--- a/stablehlo/transforms/VhloToVersion.cpp
+++ b/stablehlo/transforms/VhloToVersion.cpp
@@ -181,7 +181,17 @@ bool isLegalOperation(Operation* op, const Version& targetVersion) {
   auto opInterface = dyn_cast<VersionedOpInterface>(op);
   if (!opInterface) return false;
   if (!isLegalVersion(opInterface, targetVersion)) return false;
-  LLVM_DEBUG(llvm::dbgs() << "Legal version for target. " << op << '\n');
+  LLVM_DEBUG(llvm::dbgs() << "Legal op version for target. " << op << '\n');
+
+  // Validate op constraints
+  auto constraintInterface = dyn_cast<VersionedOpConstraintInterface>(op);
+  if (constraintInterface &&
+      failed(constraintInterface.validateConstraint(op, targetVersion))) {
+    LLVM_DEBUG(llvm::dbgs()
+               << "Op failed to satisfy versioned constraints. " << op << '\n');
+    return false;
+  }
+  LLVM_DEBUG(llvm::dbgs() << "Legal constraints for target. " << op << '\n');
 
   // Validate attributes
   auto isLegalAttrFn = [&](const NamedAttribute& attr) {