From 777c1f4e2307747d33efd755629405e5a5acd4cd Mon Sep 17 00:00:00 2001 From: Raza Jafri Date: Mon, 24 Apr 2023 10:55:48 -0700 Subject: [PATCH] Assert for non-empty nulls (#13071) Authors: - Raza Jafri (https://github.com/razajafri) Approvers: - Nghia Truong (https://github.com/ttnghia) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/13071 --- java/pom.xml | 28 +++- .../java/ai/rapids/cudf/AssertEmptyNulls.java | 36 +++++ .../main/java/ai/rapids/cudf/ColumnView.java | 2 + .../java/ai/rapids/cudf/ColumnVectorTest.java | 100 +------------ .../cudf/ColumnViewNonEmptyNullsTest.java | 134 ++++++++++++++++++ 5 files changed, 195 insertions(+), 105 deletions(-) create mode 100644 java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java create mode 100644 java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java diff --git a/java/pom.xml b/java/pom.xml index f644bed67fa..985a56d9e82 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -192,6 +192,7 @@ **/CudaFatalTest.java + **/ColumnViewNonEmptyNullsTest.java @@ -201,15 +202,22 @@ test + + non-empty-null-test + + test + + + -da:ai.rapids.cudf.AssertEmptyNulls + */ColumnViewNonEmptyNullsTest.java + + fatal-cuda-test test - - **/CudaFatalTest.java - false */CudaFatalTest.java @@ -233,6 +241,7 @@ maven-surefire-plugin + **/ColumnViewNonEmptyNullsTest.java **/CuFileTest.java **/CudaFatalTest.java @@ -250,13 +259,20 @@ test - - **/CudaFatalTest.java - false */CudaFatalTest.java + + non-empty-null-test + + test + + + -da:ai.rapids.cudf.AssertEmptyNulls + */ColumnViewNonEmptyNullsTest.java + + diff --git a/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java b/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java new file mode 100644 index 00000000000..cc1bb67ee5c --- /dev/null +++ b/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java @@ -0,0 +1,36 @@ +/* + * + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +/** + * This class is a Helper class to assert there are no non-empty nulls in a ColumnView + * + * The reason for the existence of this class is so that we can turn the asserts on/off when needed + * by passing "-da:ai.rapids.cudf.AssertEmptyNulls". We need that behavior because we have tests + * that explicitly test with ColumnViews that contain non-empty nulls but more importantly, there + * could be cases where an external system may not have a requirement of nulls being empty, so for + * us to work with those systems, we can turn off this assert in the field. + */ +public class AssertEmptyNulls { + public static void assertNullsAreEmpty(ColumnView cv) { + if (cv.type.isNestedType() || cv.type.hasOffsets()) { + assert !cv.hasNonEmptyNulls() : "Column has non-empty nulls"; + } + } +} diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 7d93438d72e..67ad9166fe0 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -52,6 +52,7 @@ public class ColumnView implements AutoCloseable, BinaryOperable { this.rows = ColumnView.getNativeRowCount(viewHandle); this.nullCount = ColumnView.getNativeNullCount(viewHandle); this.offHeap = null; + AssertEmptyNulls.assertNullsAreEmpty(this); } @@ -67,6 +68,7 @@ protected ColumnView(ColumnVector.OffHeapState state) { type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle)); rows = ColumnView.getNativeRowCount(viewHandle); nullCount = ColumnView.getNativeNullCount(viewHandle); + AssertEmptyNulls.assertNullsAreEmpty(this); } /** diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 8e19c543ee5..7cdb4538e32 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -740,43 +740,6 @@ void testSpark32BitMurmur3HashListsAndNestedLists() { } } - @Test - void testAndNullReconfigureNulls() { - try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); - ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); - ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0); - ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); - ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1); - ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null); - ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) { - assertColumnsAreEqual(v0, intResult); - assertColumnsAreEqual(stringExpected, stringResult); - assertColumnsAreEqual(v2, noMaskResult); - } - } - - @Test - void testOrNullReconfigureNulls() { - try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); - ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); - ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE); - ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); - ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); - ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); - ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); - ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); - ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); - ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); - ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { - assertColumnsAreEqual(v0, intResultV0); - assertColumnsAreEqual(v1, intResultV0V1); - assertColumnsAreEqual(v1, intResultMulti); - assertColumnsAreEqual(v2, intResultv0v1v2); - assertColumnsAreEqual(stringExpected, stringResult); - assertColumnsAreEqual(v3, noMaskResult); - } - } - @Test void isNotNullTestEmptyColumn() { try (ColumnVector v = ColumnVector.fromBoxedInts(); @@ -4635,7 +4598,7 @@ void testDropListDuplicatesWithKeysValuesNullable() { } @SafeVarargs - private static ColumnVector makeListsColumn(DType childDType, List... rows) { + public static ColumnVector makeListsColumn(DType childDType, List... rows) { HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType); HostColumnVector.DataType listType = new HostColumnVector.ListType(true, childType); return ColumnVector.fromLists(listType, rows); @@ -6713,65 +6676,4 @@ void testApplyBooleanMaskFromListOfStructure() { assertColumnsAreEqual(expectedCv, actualCv); } } - - /** - * The caller needs to make sure to close the returned ColumnView - */ - private ColumnView[] getColumnViewWithNonEmptyNulls() { - List list0 = Arrays.asList(1, 2, 3); - List list1 = Arrays.asList(4, 5, null); - List list2 = Arrays.asList(7, 8, 9); - List list3 = null; - ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); - // Modify the validity buffer - BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); - try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { - newValidity.copyFromDeviceBuffer(dmb); - BitVectorHelper.setNullAt(newValidity, 1); - dmb.copyFromHostBuffer(newValidity); - } - try (HostColumnVector hostColumnVector = input.copyToHost()) { - assert (hostColumnVector.isNull(1)); - assert (hostColumnVector.isNull(3)); - } - try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { - ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); - assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); - } - ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, - input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); - assertEquals(2, colWithNonEmptyNulls.nullCount); - return new ColumnView[]{input, colWithNonEmptyNulls}; - } - - @Test - void testPurgeNonEmptyNullsList() { - ColumnView[] values = getColumnViewWithNonEmptyNulls(); - try (ColumnView colWithNonEmptyNulls = values[1]; - ColumnView input = values[0]; - // purge non-empty nulls - ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); - ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); - ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) { - assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); - assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); - assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); - } - } - - @Test - void testPurgeNonEmptyNullsStruct() { - ColumnView[] values = getColumnViewWithNonEmptyNulls(); - try (ColumnView listCol = values[1]; - ColumnView input = values[0]; - ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); - ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); - ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); - ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); - ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); - ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) { - assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); - assertFalse(newListChild.hasNonEmptyNulls()); - } - } } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java b/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java new file mode 100644 index 00000000000..070f94395c5 --- /dev/null +++ b/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java @@ -0,0 +1,134 @@ +/* + * + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package ai.rapids.cudf; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * This class will house only tests that need to explicitly set non-empty nulls + */ +public class ColumnViewNonEmptyNullsTest extends CudfTestBase { + + @Test + void testAndNullReconfigureNulls() { + try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); + ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); + ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0); + ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); + ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1); + ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null); + ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) { + assertColumnsAreEqual(v0, intResult); + assertColumnsAreEqual(stringExpected, stringResult); + assertColumnsAreEqual(v2, noMaskResult); + } + } + + @Test + void testOrNullReconfigureNulls() { + try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); + ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); + ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE); + ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); + ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); + ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); + ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); + ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); + ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { + assertColumnsAreEqual(v0, intResultV0); + assertColumnsAreEqual(v1, intResultV0V1); + assertColumnsAreEqual(v1, intResultMulti); + assertColumnsAreEqual(v2, intResultv0v1v2); + assertColumnsAreEqual(stringExpected, stringResult); + assertColumnsAreEqual(v3, noMaskResult); + } + } + + /** + * The caller needs to make sure to close the returned ColumnView + */ + private ColumnView[] getColumnViewWithNonEmptyNulls() { + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, null); + List list2 = Arrays.asList(7, 8, 9); + List list3 = null; + ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3); + // Modify the validity buffer + BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); + try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { + newValidity.copyFromDeviceBuffer(dmb); + BitVectorHelper.setNullAt(newValidity, 1); + dmb.copyFromHostBuffer(newValidity); + } + try (HostColumnVector hostColumnVector = input.copyToHost()) { + assert (hostColumnVector.isNull(1)); + assert (hostColumnVector.isNull(3)); + } + try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { + ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); + assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); + } + ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, + input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); + assertEquals(2, colWithNonEmptyNulls.nullCount); + return new ColumnView[]{input, colWithNonEmptyNulls}; + } + + @Test + void testPurgeNonEmptyNullsList() { + ColumnView[] values = getColumnViewWithNonEmptyNulls(); + try (ColumnView colWithNonEmptyNulls = values[1]; + ColumnView input = values[0]; + // purge non-empty nulls + ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) { + assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); + } + } + + @Test + void testPurgeNonEmptyNullsStruct() { + ColumnView[] values = getColumnViewWithNonEmptyNulls(); + try (ColumnView listCol = values[1]; + ColumnView input = values[0]; + ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); + ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); + ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); + ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); + ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); + ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) { + assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); + assertFalse(newListChild.hasNonEmptyNulls()); + } + } +}