forked from rapidsai/cudf
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Assert for non-empty nulls (rapidsai#13071)
Authors: - Raza Jafri (https://github.com/razajafri) Approvers: - Nghia Truong (https://github.com/ttnghia) - Robert (Bobby) Evans (https://github.com/revans2) URL: rapidsai#13071
- Loading branch information
Showing
5 changed files
with
195 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
package ai.rapids.cudf; | ||
|
||
/** | ||
* This class is a Helper class to assert there are no non-empty nulls in a ColumnView | ||
* | ||
* The reason for the existence of this class is so that we can turn the asserts on/off when needed | ||
* by passing "-da:ai.rapids.cudf.AssertEmptyNulls". We need that behavior because we have tests | ||
* that explicitly test with ColumnViews that contain non-empty nulls but more importantly, there | ||
* could be cases where an external system may not have a requirement of nulls being empty, so for | ||
* us to work with those systems, we can turn off this assert in the field. | ||
*/ | ||
public class AssertEmptyNulls { | ||
public static void assertNullsAreEmpty(ColumnView cv) { | ||
if (cv.type.isNestedType() || cv.type.hasOffsets()) { | ||
assert !cv.hasNonEmptyNulls() : "Column has non-empty nulls"; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
134 changes: 134 additions & 0 deletions
134
java/src/test/java/ai/rapids/cudf/ColumnViewNonEmptyNullsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* | ||
* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
*/ | ||
|
||
package ai.rapids.cudf; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Optional; | ||
|
||
import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual; | ||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
/** | ||
* This class will house only tests that need to explicitly set non-empty nulls | ||
*/ | ||
public class ColumnViewNonEmptyNullsTest extends CudfTestBase { | ||
|
||
@Test | ||
void testAndNullReconfigureNulls() { | ||
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); | ||
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); | ||
ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0); | ||
ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); | ||
ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0, v1); | ||
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", null, null, "MIN_VALUE", null); | ||
ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_AND)) { | ||
assertColumnsAreEqual(v0, intResult); | ||
assertColumnsAreEqual(stringExpected, stringResult); | ||
assertColumnsAreEqual(v2, noMaskResult); | ||
} | ||
} | ||
|
||
@Test | ||
void testOrNullReconfigureNulls() { | ||
try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); | ||
ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); | ||
ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE); | ||
ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); | ||
ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); | ||
ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); | ||
ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); | ||
ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); | ||
ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); | ||
ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); | ||
ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { | ||
assertColumnsAreEqual(v0, intResultV0); | ||
assertColumnsAreEqual(v1, intResultV0V1); | ||
assertColumnsAreEqual(v1, intResultMulti); | ||
assertColumnsAreEqual(v2, intResultv0v1v2); | ||
assertColumnsAreEqual(stringExpected, stringResult); | ||
assertColumnsAreEqual(v3, noMaskResult); | ||
} | ||
} | ||
|
||
/** | ||
* The caller needs to make sure to close the returned ColumnView | ||
*/ | ||
private ColumnView[] getColumnViewWithNonEmptyNulls() { | ||
List<Integer> list0 = Arrays.asList(1, 2, 3); | ||
List<Integer> list1 = Arrays.asList(4, 5, null); | ||
List<Integer> list2 = Arrays.asList(7, 8, 9); | ||
List<Integer> list3 = null; | ||
ColumnVector input = ColumnVectorTest.makeListsColumn(DType.INT32, list0, list1, list2, list3); | ||
// Modify the validity buffer | ||
BaseDeviceMemoryBuffer dmb = input.getDeviceBufferFor(BufferType.VALIDITY); | ||
try (HostMemoryBuffer newValidity = HostMemoryBuffer.allocate(64)) { | ||
newValidity.copyFromDeviceBuffer(dmb); | ||
BitVectorHelper.setNullAt(newValidity, 1); | ||
dmb.copyFromHostBuffer(newValidity); | ||
} | ||
try (HostColumnVector hostColumnVector = input.copyToHost()) { | ||
assert (hostColumnVector.isNull(1)); | ||
assert (hostColumnVector.isNull(3)); | ||
} | ||
try (ColumnVector expectedOffsetsBeforePurge = ColumnVector.fromInts(0, 3, 6, 9, 9)) { | ||
ColumnView offsetsCvBeforePurge = input.getListOffsetsView(); | ||
assertColumnsAreEqual(expectedOffsetsBeforePurge, offsetsCvBeforePurge); | ||
} | ||
ColumnView colWithNonEmptyNulls = new ColumnView(input.type, input.rows, Optional.of(2L), dmb, | ||
input.getDeviceBufferFor(BufferType.OFFSET), input.getChildColumnViews()); | ||
assertEquals(2, colWithNonEmptyNulls.nullCount); | ||
return new ColumnView[]{input, colWithNonEmptyNulls}; | ||
} | ||
|
||
@Test | ||
void testPurgeNonEmptyNullsList() { | ||
ColumnView[] values = getColumnViewWithNonEmptyNulls(); | ||
try (ColumnView colWithNonEmptyNulls = values[1]; | ||
ColumnView input = values[0]; | ||
// purge non-empty nulls | ||
ColumnView colWithEmptyNulls = colWithNonEmptyNulls.purgeNonEmptyNulls(); | ||
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); | ||
ColumnView offsetsCvAfterPurge = colWithEmptyNulls.getListOffsetsView()) { | ||
assertTrue(colWithNonEmptyNulls.hasNonEmptyNulls()); | ||
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); | ||
assertFalse(colWithEmptyNulls.hasNonEmptyNulls()); | ||
} | ||
} | ||
|
||
@Test | ||
void testPurgeNonEmptyNullsStruct() { | ||
ColumnView[] values = getColumnViewWithNonEmptyNulls(); | ||
try (ColumnView listCol = values[1]; | ||
ColumnView input = values[0]; | ||
ColumnView stringsCol = ColumnVector.fromStrings("A", "col", "of", "Strings"); | ||
ColumnView structView = ColumnView.makeStructView(stringsCol, listCol); | ||
ColumnView structWithEmptyNulls = structView.purgeNonEmptyNulls(); | ||
ColumnView newListChild = structWithEmptyNulls.getChildColumnView(1); | ||
ColumnVector expectedOffsetsAfterPurge = ColumnVector.fromInts(0, 3, 3, 6, 6); | ||
ColumnView offsetsCvAfterPurge = newListChild.getListOffsetsView()) { | ||
assertColumnsAreEqual(expectedOffsetsAfterPurge, offsetsCvAfterPurge); | ||
assertFalse(newListChild.hasNonEmptyNulls()); | ||
} | ||
} | ||
} |