diff --git a/java/pom.xml b/java/pom.xml
index f644bed67fa..985a56d9e82 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -192,6 +192,7 @@
**/CudaFatalTest.java
+ **/ColumnViewNonEmptyNullsTest.java
@@ -201,15 +202,22 @@
test
+
+ non-empty-null-test
+
+ test
+
+
+ -da:ai.rapids.cudf.AssertEmptyNulls
+ */ColumnViewNonEmptyNullsTest.java
+
+
fatal-cuda-test
test
-
- **/CudaFatalTest.java
-
false
*/CudaFatalTest.java
@@ -233,6 +241,7 @@
maven-surefire-plugin
+ **/ColumnViewNonEmptyNullsTest.java
**/CuFileTest.java
**/CudaFatalTest.java
@@ -250,13 +259,20 @@
test
-
- **/CudaFatalTest.java
-
false
*/CudaFatalTest.java
+
+ non-empty-null-test
+
+ test
+
+
+ -da:ai.rapids.cudf.AssertEmptyNulls
+ */ColumnViewNonEmptyNullsTest.java
+
+
diff --git a/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java b/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java
new file mode 100644
index 00000000000..cc1bb67ee5c
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java
@@ -0,0 +1,36 @@
+/*
+ *
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+/**
+ * This class is a Helper class to assert there are no non-empty nulls in a ColumnView
+ *
+ * The reason for the existence of this class is so that we can turn the asserts on/off when needed
+ * by passing "-da:ai.rapids.cudf.AssertEmptyNulls". We need that behavior because we have tests
+ * that explicitly test with ColumnViews that contain non-empty nulls but more importantly, there
+ * could be cases where an external system may not have a requirement of nulls being empty, so for
+ * us to work with those systems, we can turn off this assert in the field.
+ */
+public class AssertEmptyNulls {
+ public static void assertNullsAreEmpty(ColumnView cv) {
+ if (cv.type.isNestedType() || cv.type.hasOffsets()) {
+ assert !cv.hasNonEmptyNulls() : "Column has non-empty nulls";
+ }
+ }
+}
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 7d93438d72e..67ad9166fe0 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -52,6 +52,7 @@ public class ColumnView implements AutoCloseable, BinaryOperable {
this.rows = ColumnView.getNativeRowCount(viewHandle);
this.nullCount = ColumnView.getNativeNullCount(viewHandle);
this.offHeap = null;
+ AssertEmptyNulls.assertNullsAreEmpty(this);
}
@@ -67,6 +68,7 @@ protected ColumnView(ColumnVector.OffHeapState state) {
type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle));
rows = ColumnView.getNativeRowCount(viewHandle);
nullCount = ColumnView.getNativeNullCount(viewHandle);
+ AssertEmptyNulls.assertNullsAreEmpty(this);
}
/**
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 8e19c543ee5..7cdb4538e32 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -740,43 +740,6 @@ void testSpark32BitMurmur3HashListsAndNestedLists() {
}
}
- @Test
- void testAndNullReconfigureNulls() {
- try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
- ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
- ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
- ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
- ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
- ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
- ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
- assertColumnsAreEqual(v0, intResult);
- assertColumnsAreEqual(stringExpected, stringResult);
- assertColumnsAreEqual(v2, noMaskResult);
- }
- }
-
- @Test
- void testOrNullReconfigureNulls() {
- try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
- ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
- ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
- ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
- ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
- ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
- ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
- ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
- ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
- ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
- ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
- assertColumnsAreEqual(v0, intResultV0);
- assertColumnsAreEqual(v1, intResultV0V1);
- assertColumnsAreEqual(v1, intResultMulti);
- assertColumnsAreEqual(v2, intResultv0v1v2);
- assertColumnsAreEqual(stringExpected, stringResult);
- assertColumnsAreEqual(v3, noMaskResult);
- }
- }
-
@Test
void isNotNullTestEmptyColumn() {
try (ColumnVector v = ColumnVector.fromBoxedInts();
@@ -4635,7 +4598,7 @@ void testDropListDuplicatesWithKeysValuesNullable() {
}
@SafeVarargs
- private static ColumnVector makeListsColumn(DType childDType, List... rows) {
+ public static ColumnVector makeListsColumn(DType childDType, List... rows) {
HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType);
HostColumnVector.DataType listType = new HostColumnVector.ListType(true, childType);
return ColumnVector.fromLists(listType, rows);
@@ -6713,65 +6676,4 @@ void testApplyBooleanMaskFromListOfStructure() {
assertColumnsAreEqual(expectedCv, actualCv);
}
}
-
- /**
- * The caller needs to make sure to close the returned ColumnView
- */
- private ColumnView[] getColumnViewWithNonEmptyNulls() {
- List list0 = Arrays.asList(1, 2, 3);
- List list1 = Arrays.asList(4, 5, null);
- List list2 = Arrays.asList(7, 8, 9);
- List list3 = null;
- ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
- // Modify the validity buffer
- BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
- try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
- newValidity.copyFromDeviceBuffer(dmb);
- BitVectorHelper.setNullAt(newValidity, 1);
- dmb.copyFromHostBuffer(newValidity);
- }
- try (HostColumnVector hostColumnVector = input.copyToHost()) {
- assert (hostColumnVector.isNull(1));
- assert (hostColumnVector.isNull(3));
- }
- try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
- ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
- assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
- }
- ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
- input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
- assertEquals(2, colWithNonEmptyNulls.nullCount);
- return new ColumnView[]{input, colWithNonEmptyNulls};
- }
-
- @Test
- void testPurgeNonEmptyNullsList() {
- ColumnView[] values = getColumnViewWithNonEmptyNulls();
- try (ColumnView colWithNonEmptyNulls = values[1];
- ColumnView input = values[0];
- // purge non-empty nulls
- ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
- ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
- ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
- assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
- assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
- assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
- }
- }
-
- @Test
- void testPurgeNonEmptyNullsStruct() {
- ColumnView[] values = getColumnViewWithNonEmptyNulls();
- try (ColumnView listCol = values[1];
- ColumnView input = values[0];
- ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
- ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
- ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
- ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
- ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
- ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
- assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
- assertFalse(newListChild.hasNonEmptyNulls());
- }
- }
}
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java b/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java
new file mode 100644
index 00000000000..070f94395c5
--- /dev/null
+++ b/java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java
@@ -0,0 +1,134 @@
+/*
+ *
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * This class will house only tests that need to explicitly set non-empty nulls
+ */
+public class ColumnViewNonEmptyNullsTest extends CudfTestBase {
+
+ @Test
+ void testAndNullReconfigureNulls() {
+ try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
+ ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
+ ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
+ ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
+ ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
+ ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
+ ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
+ assertColumnsAreEqual(v0, intResult);
+ assertColumnsAreEqual(stringExpected, stringResult);
+ assertColumnsAreEqual(v2, noMaskResult);
+ }
+ }
+
+ @Test
+ void testOrNullReconfigureNulls() {
+ try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
+ ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
+ ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
+ ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
+ ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
+ ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
+ ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
+ ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
+ ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
+ ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
+ ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
+ assertColumnsAreEqual(v0, intResultV0);
+ assertColumnsAreEqual(v1, intResultV0V1);
+ assertColumnsAreEqual(v1, intResultMulti);
+ assertColumnsAreEqual(v2, intResultv0v1v2);
+ assertColumnsAreEqual(stringExpected, stringResult);
+ assertColumnsAreEqual(v3, noMaskResult);
+ }
+ }
+
+ /**
+ * The caller needs to make sure to close the returned ColumnView
+ */
+ private ColumnView[] getColumnViewWithNonEmptyNulls() {
+ List list0 = Arrays.asList(1, 2, 3);
+ List list1 = Arrays.asList(4, 5, null);
+ List list2 = Arrays.asList(7, 8, 9);
+ List list3 = null;
+ ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3);
+ // Modify the validity buffer
+ BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
+ try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
+ newValidity.copyFromDeviceBuffer(dmb);
+ BitVectorHelper.setNullAt(newValidity, 1);
+ dmb.copyFromHostBuffer(newValidity);
+ }
+ try (HostColumnVector hostColumnVector = input.copyToHost()) {
+ assert (hostColumnVector.isNull(1));
+ assert (hostColumnVector.isNull(3));
+ }
+ try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
+ ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
+ assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
+ }
+ ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
+ input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
+ assertEquals(2, colWithNonEmptyNulls.nullCount);
+ return new ColumnView[]{input, colWithNonEmptyNulls};
+ }
+
+ @Test
+ void testPurgeNonEmptyNullsList() {
+ ColumnView[] values = getColumnViewWithNonEmptyNulls();
+ try (ColumnView colWithNonEmptyNulls = values[1];
+ ColumnView input = values[0];
+ // purge non-empty nulls
+ ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
+ ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
+ ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
+ assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
+ assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
+ assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
+ }
+ }
+
+ @Test
+ void testPurgeNonEmptyNullsStruct() {
+ ColumnView[] values = getColumnViewWithNonEmptyNulls();
+ try (ColumnView listCol = values[1];
+ ColumnView input = values[0];
+ ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
+ ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
+ ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
+ ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
+ ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
+ ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
+ assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
+ assertFalse(newListChild.hasNonEmptyNulls());
+ }
+ }
+}