Skip to content

Commit

Permalink
Assert for non-empty nulls (rapidsai#13071)
Browse files Browse the repository at this point in the history
Authors:
  - Raza Jafri (https://github.com/razajafri)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: rapidsai#13071
  • Loading branch information
razajafri authored Apr 24, 2023
1 parent 310fe9f commit 777c1f4
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 105 deletions.
28 changes: 22 additions & 6 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
<configuration>
<excludes>
<exclude>**/CudaFatalTest.java</exclude>
<exclude>**/ColumnViewNonEmptyNullsTest.java</exclude>
</excludes>
</configuration>
<executions>
Expand All @@ -201,15 +202,22 @@
<goal>test</goal>
</goals>
</execution>
<execution>
<id>non-empty-null-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<argLine>-da:ai.rapids.cudf.AssertEmptyNulls</argLine>
<test>*/ColumnViewNonEmptyNullsTest.java</test>
</configuration>
</execution>
<execution>
<id>fatal-cuda-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<includes>
<include>**/CudaFatalTest.java</include>
</includes>
<reuseForks>false</reuseForks>
<test>*/CudaFatalTest.java</test>
</configuration>
Expand All @@ -233,6 +241,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/ColumnViewNonEmptyNullsTest.java</exclude>
<exclude>**/CuFileTest.java</exclude>
<exclude>**/CudaFatalTest.java</exclude>
</excludes>
Expand All @@ -250,13 +259,20 @@
<goal>test</goal>
</goals>
<configuration>
<includes>
<include>**/CudaFatalTest.java</include>
</includes>
<reuseForks>false</reuseForks>
<test>*/CudaFatalTest.java</test>
</configuration>
</execution>
<execution>
<id>non-empty-null-test</id>
<goals>
<goal>test</goal>
</goals>
<configuration>
<argLine>-da:ai.rapids.cudf.AssertEmptyNulls</argLine>
<test>*/ColumnViewNonEmptyNullsTest.java</test>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
Expand Down
36 changes: 36 additions & 0 deletions java/src/main/java/ai/rapids/cudf/AssertEmptyNulls.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

/**
* This class is a Helper class to assert there are no non-empty nulls in a ColumnView
*
* The reason for the existence of this class is so that we can turn the asserts on/off when needed
* by passing "-da:ai.rapids.cudf.AssertEmptyNulls". We need that behavior because we have tests
* that explicitly test with ColumnViews that contain non-empty nulls but more importantly, there
* could be cases where an external system may not have a requirement of nulls being empty, so for
* us to work with those systems, we can turn off this assert in the field.
*/
public class AssertEmptyNulls {
public static void assertNullsAreEmpty(ColumnView cv) {
if (cv.type.isNestedType() || cv.type.hasOffsets()) {
assert !cv.hasNonEmptyNulls() : "Column has non-empty nulls";
}
}
}
2 changes: 2 additions & 0 deletions java/src/main/java/ai/rapids/cudf/ColumnView.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class ColumnView implements AutoCloseable, BinaryOperable {
this.rows = ColumnView.getNativeRowCount(viewHandle);
this.nullCount = ColumnView.getNativeNullCount(viewHandle);
this.offHeap = null;
AssertEmptyNulls.assertNullsAreEmpty(this);
}


Expand All @@ -67,6 +68,7 @@ protected ColumnView(ColumnVector.OffHeapState state) {
type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle));
rows = ColumnView.getNativeRowCount(viewHandle);
nullCount = ColumnView.getNativeNullCount(viewHandle);
AssertEmptyNulls.assertNullsAreEmpty(this);
}

/**
Expand Down
100 changes: 1 addition & 99 deletions java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -740,43 +740,6 @@ void testSpark32BitMurmur3HashListsAndNestedLists() {
}
}

@Test
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
assertColumnsAreEqual(v0, intResult);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(v2, intResultv0v1v2);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v3, noMaskResult);
}
}

@Test
void isNotNullTestEmptyColumn() {
try (ColumnVector v = ColumnVector.fromBoxedInts();
Expand Down Expand Up @@ -4635,7 +4598,7 @@ void testDropListDuplicatesWithKeysValuesNullable() {
}

@SafeVarargs
private static <T> ColumnVector makeListsColumn(DType childDType, List<T>... rows) {
public static <T> ColumnVector makeListsColumn(DType childDType, List<T>... rows) {
HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType);
HostColumnVector.DataType listType = new HostColumnVector.ListType(true, childType);
return ColumnVector.fromLists(listType, rows);
Expand Down Expand Up @@ -6713,65 +6676,4 @@ void testApplyBooleanMaskFromListOfStructure() {
assertColumnsAreEqual(expectedCv, actualCv);
}
}

/**
* The caller needs to make sure to close the returned ColumnView
*/
private ColumnView[] getColumnViewWithNonEmptyNulls() {
List<Integer> list0 = Arrays.asList(1, 2, 3);
List<Integer> list1 = Arrays.asList(4, 5, null);
List<Integer> list2 = Arrays.asList(7, 8, 9);
List<Integer> list3 = null;
ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
// Modify the validity buffer
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
newValidity.copyFromDeviceBuffer(dmb);
BitVectorHelper.setNullAt(newValidity, 1);
dmb.copyFromHostBuffer(newValidity);
}
try (HostColumnVector hostColumnVector = input.copyToHost()) {
assert (hostColumnVector.isNull(1));
assert (hostColumnVector.isNull(3));
}
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
}
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
assertEquals(2, colWithNonEmptyNulls.nullCount);
return new ColumnView[]{input, colWithNonEmptyNulls};
}

@Test
void testPurgeNonEmptyNullsList() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView colWithNonEmptyNulls = values[1];
ColumnView input = values[0];
// purge non-empty nulls
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
}
}

@Test
void testPurgeNonEmptyNullsStruct() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView listCol = values[1];
ColumnView input = values[0];
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(newListChild.hasNonEmptyNulls());
}
}
}
134 changes: 134 additions & 0 deletions java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package ai.rapids.cudf;

import org.junit.jupiter.api.Test;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;

import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* This class will house only tests that need to explicitly set non-empty nulls
*/
public class ColumnViewNonEmptyNullsTest extends CudfTestBase {

@Test
void testAndNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null);
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) {
assertColumnsAreEqual(v0, intResult);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v2, noMaskResult);
}
}

@Test
void testOrNullReconfigureNulls() {
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
assertColumnsAreEqual(v0, intResultV0);
assertColumnsAreEqual(v1, intResultV0V1);
assertColumnsAreEqual(v1, intResultMulti);
assertColumnsAreEqual(v2, intResultv0v1v2);
assertColumnsAreEqual(stringExpected, stringResult);
assertColumnsAreEqual(v3, noMaskResult);
}
}

/**
* The caller needs to make sure to close the returned ColumnView
*/
private ColumnView[] getColumnViewWithNonEmptyNulls() {
List<Integer> list0 = Arrays.asList(1, 2, 3);
List<Integer> list1 = Arrays.asList(4, 5, null);
List<Integer> list2 = Arrays.asList(7, 8, 9);
List<Integer> list3 = null;
ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3);
// Modify the validity buffer
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY);
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) {
newValidity.copyFromDeviceBuffer(dmb);
BitVectorHelper.setNullAt(newValidity, 1);
dmb.copyFromHostBuffer(newValidity);
}
try (HostColumnVector hostColumnVector = input.copyToHost()) {
assert (hostColumnVector.isNull(1));
assert (hostColumnVector.isNull(3));
}
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) {
ColumnView offsetsCvBeforePurge = input.getListOffsetsView();
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge);
}
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb,
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews());
assertEquals(2, colWithNonEmptyNulls.nullCount);
return new ColumnView[]{input, colWithNonEmptyNulls};
}

@Test
void testPurgeNonEmptyNullsList() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView colWithNonEmptyNulls = values[1];
ColumnView input = values[0];
// purge non-empty nulls
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls();
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) {
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls());
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(colWithEmptyNulls.hasNonEmptyNulls());
}
}

@Test
void testPurgeNonEmptyNullsStruct() {
ColumnView[] values = getColumnViewWithNonEmptyNulls();
try (ColumnView listCol = values[1];
ColumnView input = values[0];
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings");
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol);
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls();
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1);
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6);
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) {
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge);
assertFalse(newListChild.hasNonEmptyNulls());
}
}
}

0 comments on commit 777c1f4

Please sign in to comment.