From f4924a9c382af8f2cb898dab82cb4ee8834a31d5 Mon Sep 17 00:00:00 2001 From: MithunR Date: Mon, 15 Apr 2024 12:56:58 -0700 Subject: [PATCH 01/24] strings::contains() for multiple search targets This commit adds a new `strings::contains()` overload that allows for the search of multiple scalar search targets in the same call. The trick here is that a new kernel has been introduced, to extend the "string-per-warp" approach to search for multiple search keys in the same kernel. This approach allows CUDF to potentially reduce the number of kernels launched for `string::contains()` by a factor of `N`, if all the search keys can be specified in the same call. This helps reduce the kernel-launch overheads for processes that do large numbers of calls to `string::contains()`. Signed-off-by: MithunR Changed iteration order, for better cache performance. More optimizations: 1. Removed calls to `thrust::fill()`. The bool values are now explicitly written in the kernel. 2. Switched host-device copy to use async. Revert "More optimizations:" This reverts commit c0e355c7be116ebd79bcdc6aa77c36af182e5ec6. This commit was wrong: The thrust::fill() checks for empty target strings. If removed, we'll need to check for empty target strings for every input string row. This was better done the old way. More improvements: 1. Removed thrust::fill call. Setting values explicitly in the kernel. 2. Switched from using io::hostdevice_vector to rmm::device_uvector. The string_view allocation is tiny. This has helped reduce the time spent in strings::contains(). For small strings, delegate to thread-per-string algo. --- cpp/include/cudf/strings/find.hpp | 25 +++ cpp/src/strings/search/find.cu | 159 +++++++++++++++++- cpp/tests/strings/find_tests.cpp | 40 ++++- .../main/java/ai/rapids/cudf/ColumnView.java | 24 +++ java/src/main/native/src/ColumnViewJni.cpp | 33 +++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 27 +++ 6 files changed, 298 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index efba6da9454..e8d7f608aea 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -140,6 +140,31 @@ std::unique_ptr contains( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a table of columns of boolean values for each string where true indicates + * the target string was found within that string in the provided column. + * + * Each column in the result table corresponds to the result for the target string at the same + * ordinal. i.e. 0th column is the boolean-column result for the 0th target string, 1th for 1th, + * etc. + * + * If the target is not found for a string, false is returned for that entry in the output column. + * If the target is an empty string, true is returned for all non-null entries in the output column. + * + * Any null string entries return corresponding null entries in the output columns. + * + * @param input Strings instance for this operation + * @param targets UTF-8 encoded strings to search for in each string in `input` + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New BOOL8 column + */ +std::unique_ptr contains( + strings_column_view const& input, + std::vector> const& targets, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Returns a column of boolean values for each string where true indicates * the corresponding target string was found within that string in the provided column. diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 45eba39f413..7133ee34aec 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include #include #include #include #include +#include #include #include #include @@ -381,6 +381,53 @@ CUDF_KERNEL void contains_warp_parallel_fn(column_device_view const d_strings, if (lane_idx == 0) { d_results[str_idx] = result; } } +CUDF_KERNEL void multi_contains_warp_parallel_fn(column_device_view const d_strings, + cudf::device_span d_targets, + cudf::device_span d_results) +{ + auto const num_targets = d_targets.size(); + auto const num_rows = d_strings.size(); + + auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); + using warp_reduce = cub::WarpReduce; + __shared__ typename warp_reduce::TempStorage temp_storage; + + if (idx >= (num_rows * cudf::detail::warp_size * num_targets)) { return; } + + auto const lane_idx = idx % cudf::detail::warp_size; + auto const str_idx = (idx / cudf::detail::warp_size) / num_targets; + auto const target_idx = (idx / cudf::detail::warp_size) % num_targets; + + if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. + + // Identify the target. + auto const d_target = d_targets[target_idx]; + + if (d_target.size_bytes() == 0) { + d_results[target_idx][str_idx] = true; // Empty string is always found. + return; + } + + // get the string for this warp + auto const d_str = d_strings.element(str_idx); + + if (d_target.size_bytes() > d_str.size_bytes()) { + d_results[target_idx][str_idx] = false; // Target can't possibly fit in the input string. + return; + } + + // each thread of the warp will check just part of the string + auto found = false; + for (auto i = static_cast(idx % cudf::detail::warp_size); + !found && ((i + d_target.size_bytes()) <= d_str.size_bytes()); + i += cudf::detail::warp_size) { + // check the target matches this part of the d_str data + if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; } + } + auto const result = warp_reduce(temp_storage).Reduce(found, cub::Max()); + if (lane_idx == 0) { d_results[target_idx][str_idx] = result; } +} + std::unique_ptr contains_warp_parallel(strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream, @@ -414,6 +461,69 @@ std::unique_ptr contains_warp_parallel(strings_column_view const& input, return results; } +std::vector> multi_contains_warp_parallel( + strings_column_view const& input, + std::vector> const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_targets = static_cast(targets.size()); + CUDF_EXPECTS(not targets.empty(), "Must specify at least one target string."); + CUDF_EXPECTS(std::all_of(targets.begin(), + targets.end(), + [&](auto const& target) { return target.get().is_valid(stream); }), + "Target search strings must be valid."); + + // Convert targets into string-views for querying. Copy to device. + auto device_targets = [&] { + auto const host_target_iter = + thrust::make_transform_iterator(targets.begin(), [](auto const& ref) { + return string_view{ref.get().data(), ref.get().size()}; + }); + auto const host_targets = + std::vector(host_target_iter, host_target_iter + targets.size()); + return cudf::detail::make_device_uvector_async( + host_targets, stream, rmm::mr::get_current_device_resource()); + }(); + + // Create output columns. + auto const results_iter = + thrust::make_transform_iterator(targets.begin(), [&](auto const& target) { + return make_numeric_column(data_type{type_id::BOOL8}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); + }); + auto results_list = + std::vector>(results_iter, results_iter + targets.size()); + + auto device_results_list = [&] { + auto host_results_pointer_iter = + thrust::make_transform_iterator(results_list.begin(), [](auto const& results_column) { + return results_column->mutable_view().template data(); + }); + auto host_results_pointers = std::vector( + host_results_pointer_iter, host_results_pointer_iter + results_list.size()); + return cudf::detail::make_device_uvector_async(host_results_pointers, stream, mr); + }(); + + // Populate all output vectors, + + constexpr int block_size = 256; + // launch warp per string + auto const d_strings = column_device_view::create(input.parent(), stream); + cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size * num_targets, block_size}; + multi_contains_warp_parallel_fn<<>>( + *d_strings, device_targets, device_results_list); + + return results_list; +} + /** * @brief Utility to return a bool column indicating the presence of * a given target string in a strings column. @@ -534,6 +644,16 @@ std::unique_ptr contains_fn(strings_column_view const& strings, return results; } +std::unique_ptr contains_small_strings_impl(strings_column_view const& input, + string_scalar const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto pfn = [] __device__(string_view d_string, string_view d_target) { + return d_string.find(d_target) != string_view::npos; + }; + return contains_fn(input, target, pfn, stream, mr); +} } // namespace std::unique_ptr contains(strings_column_view const& input, @@ -548,10 +668,30 @@ std::unique_ptr contains(strings_column_view const& input, } // benchmark measurements showed this to be faster for smaller strings - auto pfn = [] __device__(string_view d_string, string_view d_target) { - return d_string.find(d_target) != string_view::npos; - }; - return contains_fn(input, target, pfn, stream, mr); + return contains_small_strings_impl(input, target, stream, mr); +} + +std::unique_ptr
contains(strings_column_view const& input, + std::vector> const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto result_columns = [&] { + if ((input.null_count() < input.size()) && + ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { + // Large strings. + // use warp parallel when the average string width is greater than the threshold + return multi_contains_warp_parallel(input, targets, stream, mr); + } else { + // Small strings. Searching for one string at a time seems to work fastest. + auto contains_iter = + thrust::make_transform_iterator(targets.begin(), [&](auto const& target) { + return contains_small_strings_impl(input, target.get(), stream, mr); + }); + return std::vector>(contains_iter, contains_iter + targets.size()); + } + }(); + return std::make_unique
(std::move(result_columns)); } std::unique_ptr contains(strings_column_view const& strings, @@ -632,6 +772,15 @@ std::unique_ptr contains(strings_column_view const& strings, return detail::contains(strings, target, stream, mr); } +std::unique_ptr
contains(strings_column_view const& strings, + std::vector> const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::contains(strings, targets, stream, mr); +} + std::unique_ptr contains(strings_column_view const& strings, strings_column_view const& targets, rmm::cuda_stream_view stream, diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index 2da95ba5c27..c766d602a28 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -17,16 +17,14 @@ #include #include #include +#include -#include #include #include #include #include #include -#include - #include struct StringsFindTest : public cudf::test::BaseFixture {}; @@ -198,6 +196,42 @@ TEST_F(StringsFindTest, ContainsLongStrings) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } +TEST_F(StringsFindTest, MultiContains) +{ + using cudf::test::iterators::null_at; + auto const strings = cudf::test::strings_column_wrapper{ + {"Héllo, there world and goodbye", + "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving", + "the following code snippet demonstrates how to use search for values in an ordered range", + "it returns the last position where value could be inserted without violating the ordering", + "algorithms execution is parallelized as determined by an execution policy. t", + "he this is a continuation of previous row to make sure string boundaries are honored", + "abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ !@#$%^&*()~", + "", + ""}, + null_at(8)}; + auto strings_view = cudf::strings_column_view(strings); + + auto search_key_0 = cudf::string_scalar{" the "}; + auto search_key_1 = cudf::string_scalar{"a"}; + auto search_key_2 = cudf::string_scalar{""}; + auto search_keys = std::vector>{}; + search_keys.emplace_back(search_key_0); + search_keys.emplace_back(search_key_1); + search_keys.emplace_back(search_key_2); + + auto results = cudf::strings::contains(strings_view, search_keys); + auto expected_0 = + cudf::test::fixed_width_column_wrapper({0, 1, 0, 1, 0, 0, 0, 0, 0}, null_at(8)); + auto expected_1 = + cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 0, 0}, null_at(8)); + auto expected_2 = + cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 1, 0}, null_at(8)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), expected_0); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), expected_1); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), expected_2); +} + TEST_F(StringsFindTest, StartsWith) { cudf::test::strings_column_wrapper strings({"Héllo", "thesé", "", "lease", "tést strings", ""}, diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 8ff2f0f0a73..5f0b7ce172d 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -20,6 +20,7 @@ import java.util.*; import java.util.stream.IntStream; +import java.util.stream.Stream; import static ai.rapids.cudf.HostColumnVector.OFFSET_SIZE; @@ -3351,6 +3352,24 @@ public final ColumnVector stringContains(Scalar compString) { return new ColumnVector(stringContains(getNativeView(), compString.getScalarHandle())); } + private static long[] toPrimitive(Long[] longs) { + long[] ret = new long[longs.length]; + for (int i = 0; i < longs.length; ++i) { + ret[i] = longs[i]; + } + return ret; + } + + public final ColumnVector[] stringContains(Scalar[] compStrings) { + assert type.equals(DType.STRING) : "column type must be a String"; + assert Arrays.stream(compStrings).allMatch(Objects::nonNull) : "compString scalars may not be null"; + assert Arrays.stream(compStrings).allMatch(str -> str.getType().equals(DType.STRING)) : "compString scalars must be string scalars"; + Long[] scalarHandles = Arrays.stream(compStrings).map(Scalar::getScalarHandle).toArray(Long[]::new); + + long[] resultPointers = stringContainsMulti(getNativeView(), toPrimitive(scalarHandles)); + return Arrays.stream(resultPointers).mapToObj(ColumnVector::new).toArray(ColumnVector[]::new); + } + /** * Replaces values less than `lo` in `input` with `lo`, * and values greater than `hi` with `hi`. @@ -4456,6 +4475,11 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat */ private static native long stringContains(long cudfViewHandle, long compString) throws CudfException; + /** + * Check multiple target strings against the same input column. + */ + private static native long[] stringContainsMulti(long cudfViewHandle, long[] compStrings) throws CudfException; + /** * Native method for extracting results from a regex program pattern. Returns a table handle. * diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4551325ebb1..162a375d97c 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1522,8 +1522,37 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringContains(JNIEnv* en CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_matchesRe(JNIEnv* env, - jobject j_object, +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringContainsMulti( + JNIEnv *env, jobject j_object, jlong j_view_handle, jlongArray j_comp_strings) { + JNI_NULL_CHECK(env, j_view_handle, "column is null", 0); + JNI_NULL_CHECK(env, j_comp_strings, "array of comparison scalars is null", 0); + + try { + cudf::jni::auto_set_device(env); + auto *column_view = reinterpret_cast(j_view_handle); + auto const strings_column = cudf::strings_column_view(*column_view); + auto comp_string_pointers = + cudf::jni::native_jpointerArray{env, j_comp_strings}; + auto comp_strings = std::vector{}; + std::transform(comp_string_pointers.begin(), comp_string_pointers.end(), + std::back_inserter(comp_strings), [](auto i) { return *i; }); + auto comp_strings_refs = std::vector>( + comp_strings.begin(), comp_strings.end()); + auto contains_results = cudf::strings::contains(strings_column, comp_strings_refs); + // auto comp_strings_iter = thrust::make_transform_iterator(comp_string_pointers.begin(), + // [](auto i) -> cudf::string_scalar const& { return *i; }); auto comp_strings = + // std::vector>(comp_strings_iter, + // comp_strings_iter + // + + // comp_string_pointers.size()); + // auto contains_results = cudf::strings::contains(strings_column, comp_strings); + return cudf::jni::convert_table_for_return(env, std::move(contains_results)); + // return 0; + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_matchesRe(JNIEnv *env, jobject j_object, jlong j_view_handle, jstring pattern_obj, jint regex_flags, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 7136b162c13..811077fb7c1 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3827,6 +3827,33 @@ void testStringOpsEmpty() { } } + @Test + void testStringContainsMulti() { + ColumnVector[] results = null; + try (ColumnVector haystack = ColumnVector.fromStrings("All the leaves are brown", + "And the sky is grey", + "I've been for a walk", + "On a winter's day", + null, + ""); + Scalar needle0 = Scalar.fromString("the"); + Scalar needle1 = Scalar.fromString("a"); + ColumnVector expected0 = ColumnVector.fromBoxedBooleans(true, true, false, false, null, false); + ColumnVector expected1 = ColumnVector.fromBoxedBooleans(true, false, true, true, null, false)) { + + results = haystack.stringContains(new Scalar[]{needle0, needle1}); + assertColumnsAreEqual(results[0], expected0); + assertColumnsAreEqual(results[1], expected1); + + } finally { + if (results != null) { + for (ColumnVector c : results) { + c.close(); + } + } + } + } + @Test void testStringFindOperations() { try (ColumnVector testStrings = ColumnVector.fromStrings("", null, "abCD", "1a\"\u0100B1", "a\"\u0100B1", "1a\"\u0100B", From 1022c835807e02b931991bc68bc8292df5e35410 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 22 Aug 2024 09:30:55 +0800 Subject: [PATCH 02/24] string contains optimization Signed-off-by: Chong Gao --- cpp/src/strings/search/find.cu | 159 +++++++++++------- java/src/main/native/src/ColumnViewJni.cpp | 18 +- .../java/ai/rapids/cudf/ColumnVectorTest.java | 24 +-- 3 files changed, 120 insertions(+), 81 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 7133ee34aec..637fd4a6bf5 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -381,53 +381,6 @@ CUDF_KERNEL void contains_warp_parallel_fn(column_device_view const d_strings, if (lane_idx == 0) { d_results[str_idx] = result; } } -CUDF_KERNEL void multi_contains_warp_parallel_fn(column_device_view const d_strings, - cudf::device_span d_targets, - cudf::device_span d_results) -{ - auto const num_targets = d_targets.size(); - auto const num_rows = d_strings.size(); - - auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); - using warp_reduce = cub::WarpReduce; - __shared__ typename warp_reduce::TempStorage temp_storage; - - if (idx >= (num_rows * cudf::detail::warp_size * num_targets)) { return; } - - auto const lane_idx = idx % cudf::detail::warp_size; - auto const str_idx = (idx / cudf::detail::warp_size) / num_targets; - auto const target_idx = (idx / cudf::detail::warp_size) % num_targets; - - if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. - - // Identify the target. - auto const d_target = d_targets[target_idx]; - - if (d_target.size_bytes() == 0) { - d_results[target_idx][str_idx] = true; // Empty string is always found. - return; - } - - // get the string for this warp - auto const d_str = d_strings.element(str_idx); - - if (d_target.size_bytes() > d_str.size_bytes()) { - d_results[target_idx][str_idx] = false; // Target can't possibly fit in the input string. - return; - } - - // each thread of the warp will check just part of the string - auto found = false; - for (auto i = static_cast(idx % cudf::detail::warp_size); - !found && ((i + d_target.size_bytes()) <= d_str.size_bytes()); - i += cudf::detail::warp_size) { - // check the target matches this part of the d_str data - if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; } - } - auto const result = warp_reduce(temp_storage).Reduce(found, cub::Max()); - if (lane_idx == 0) { d_results[target_idx][str_idx] = result; } -} - std::unique_ptr contains_warp_parallel(strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream, @@ -461,9 +414,89 @@ std::unique_ptr contains_warp_parallel(strings_column_view const& input, return results; } -std::vector> multi_contains_warp_parallel( +CUDF_KERNEL void multi_contains_fn(column_device_view const d_strings, + cudf::device_span d_targets, + cudf::device_span d_results) +{ + auto const str_idx = static_cast(cudf::detail::grid_1d::global_thread_id()); + auto const num_targets = d_targets.size(); + auto const num_rows = d_strings.size(); + if (str_idx >= num_rows) { return; } + if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. + auto const d_str = d_strings.element(str_idx); + + // check empty target + for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { + auto const d_target = d_targets[target_idx]; + if (d_target.size_bytes() == 0) { + d_results[target_idx][str_idx] = true; // Empty string is always found. + } else { + d_results[target_idx][str_idx] = false; + } + } + + for (auto str_byte_idx = 0; str_byte_idx < d_str.size_bytes(); + ++str_byte_idx) { // iterate the start index in the string + for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { // iterate targets + if (!d_results[target_idx][str_idx]) { // not found before + auto const d_target = d_targets[target_idx]; + if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes() && + (d_target.compare(d_str.data() + str_byte_idx, d_target.size_bytes()) == 0)) { + // found + d_results[target_idx][str_idx] = true; + } + } + } + } +} + +CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( + column_device_view const d_strings, + cudf::device_span d_targets, + cudf::device_span d_results) +{ + auto const num_targets = d_targets.size(); + auto const num_rows = d_strings.size(); + + auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); + using warp_reduce = cub::WarpReduce; + __shared__ typename warp_reduce::TempStorage temp_storage; + + if (idx >= (num_rows * cudf::detail::warp_size)) { return; } + + auto const lane_idx = idx % cudf::detail::warp_size; + auto const str_idx = idx / cudf::detail::warp_size; + if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. + + // get the string for this warp + auto const d_str = d_strings.element(str_idx); + + for (size_t target_idx = 0; target_idx < num_targets; target_idx++) { + // Identify the target. + auto const d_target = d_targets[target_idx]; + + // each thread of the warp will check just part of the string + auto found = false; + if (d_target.empty()) { + found = true; + } else { + for (auto i = static_cast(idx % cudf::detail::warp_size); + !found && ((i + d_target.size_bytes()) <= d_str.size_bytes()); + i += cudf::detail::warp_size) { + // check the target matches this part of the d_str data + if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; } + } + } + __syncwarp(); + auto const result = warp_reduce(temp_storage).Reduce(found, cub::Max()); + if (lane_idx == 0) { d_results[target_idx][str_idx] = result; } + } +} + +std::vector> multi_contains( strings_column_view const& input, std::vector> const& targets, + bool warp_parallel, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -514,13 +547,19 @@ std::vector> multi_contains_warp_parallel( constexpr int block_size = 256; // launch warp per string auto const d_strings = column_device_view::create(input.parent(), stream); - cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size * num_targets, block_size}; - multi_contains_warp_parallel_fn<<>>( - *d_strings, device_targets, device_results_list); + if (warp_parallel) { + cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size * num_targets, block_size}; + multi_contains_warp_parallel_multi_scalars_fn<<>>( + *d_strings, device_targets, device_results_list); + } else { + cudf::detail::grid_1d grid{input.size(), block_size}; + multi_contains_fn<<>>( + *d_strings, device_targets, device_results_list); + } return results_list; } @@ -647,7 +686,7 @@ std::unique_ptr contains_fn(strings_column_view const& strings, std::unique_ptr contains_small_strings_impl(strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { auto pfn = [] __device__(string_view d_string, string_view d_target) { return d_string.find(d_target) != string_view::npos; @@ -681,14 +720,10 @@ std::unique_ptr
contains(strings_column_view const& input, ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { // Large strings. // use warp parallel when the average string width is greater than the threshold - return multi_contains_warp_parallel(input, targets, stream, mr); + return multi_contains(input, targets, /*warp_parallel=*/true, stream, mr); } else { - // Small strings. Searching for one string at a time seems to work fastest. - auto contains_iter = - thrust::make_transform_iterator(targets.begin(), [&](auto const& target) { - return contains_small_strings_impl(input, target.get(), stream, mr); - }); - return std::vector>(contains_iter, contains_iter + targets.size()); + // Small strings. Searching for multiple targets in one thread seems to work fastest. + return multi_contains(input, targets, /*warp_parallel=*/false, stream, mr); } }(); return std::make_unique
(std::move(result_columns)); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 162a375d97c..6d1f656a2ad 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1523,21 +1523,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringContains(JNIEnv* en } JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringContainsMulti( - JNIEnv *env, jobject j_object, jlong j_view_handle, jlongArray j_comp_strings) { + JNIEnv* env, jobject j_object, jlong j_view_handle, jlongArray j_comp_strings) +{ JNI_NULL_CHECK(env, j_view_handle, "column is null", 0); JNI_NULL_CHECK(env, j_comp_strings, "array of comparison scalars is null", 0); try { cudf::jni::auto_set_device(env); - auto *column_view = reinterpret_cast(j_view_handle); + auto* column_view = reinterpret_cast(j_view_handle); auto const strings_column = cudf::strings_column_view(*column_view); auto comp_string_pointers = - cudf::jni::native_jpointerArray{env, j_comp_strings}; + cudf::jni::native_jpointerArray{env, j_comp_strings}; auto comp_strings = std::vector{}; - std::transform(comp_string_pointers.begin(), comp_string_pointers.end(), - std::back_inserter(comp_strings), [](auto i) { return *i; }); + std::transform(comp_string_pointers.begin(), + comp_string_pointers.end(), + std::back_inserter(comp_strings), + [](auto i) { return *i; }); auto comp_strings_refs = std::vector>( - comp_strings.begin(), comp_strings.end()); + comp_strings.begin(), comp_strings.end()); auto contains_results = cudf::strings::contains(strings_column, comp_strings_refs); // auto comp_strings_iter = thrust::make_transform_iterator(comp_string_pointers.begin(), // [](auto i) -> cudf::string_scalar const& { return *i; }); auto comp_strings = @@ -1552,7 +1555,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringContainsMulti( CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_matchesRe(JNIEnv *env, jobject j_object, +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_matchesRe(JNIEnv* env, + jobject j_object, jlong j_view_handle, jstring pattern_obj, jint regex_flags, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 811077fb7c1..bfa0db940a1 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3831,26 +3831,26 @@ void testStringOpsEmpty() { void testStringContainsMulti() { ColumnVector[] results = null; try (ColumnVector haystack = ColumnVector.fromStrings("All the leaves are brown", - "And the sky is grey", - "I've been for a walk", - "On a winter's day", - null, - ""); + "And the sky is grey", + "I've been for a walk", + "On a winter's day", + null, + ""); Scalar needle0 = Scalar.fromString("the"); Scalar needle1 = Scalar.fromString("a"); ColumnVector expected0 = ColumnVector.fromBoxedBooleans(true, true, false, false, null, false); ColumnVector expected1 = ColumnVector.fromBoxedBooleans(true, false, true, true, null, false)) { - results = haystack.stringContains(new Scalar[]{needle0, needle1}); - assertColumnsAreEqual(results[0], expected0); - assertColumnsAreEqual(results[1], expected1); + results = haystack.stringContains(new Scalar[]{needle0, needle1}); + assertColumnsAreEqual(results[0], expected0); + assertColumnsAreEqual(results[1], expected1); } finally { - if (results != null) { - for (ColumnVector c : results) { - c.close(); - } + if (results != null) { + for (ColumnVector c : results) { + c.close(); } + } } } From 45170e95b3100d52d4edace910e969736df12fce Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 22 Aug 2024 19:53:19 +0800 Subject: [PATCH 03/24] Add benchmark test --- cpp/benchmarks/string/find.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index a9c620e4bf0..a729563ae40 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -71,8 +71,26 @@ static void bench_find_string(nvbench::state& state) cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); }); } else if (api == "contains") { - state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { cudf::strings::contains(input, target); }); + constexpr int iters = 20; + std::vector match_targets({"123", "abc", "4567890", "DEFGHI", "5W43"}); + auto scalar_targets = std::vector{}; + for (int i = 0; i < iters; i++) { + scalar_targets.emplace_back(cudf::string_scalar(match_targets[i % match_targets.size()])); + } + auto targets = std::vector>(scalar_targets.begin(), + scalar_targets.end()); + + constexpr bool combine = false; + if constexpr (not combine) { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + for (size_t i = 0; i < scalar_targets.size(); i++) { + cudf::strings::contains(input, scalar_targets[i]); + } + }); + } else { // combine + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::contains(input, targets); }); + } } else if (api == "starts_with") { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); }); From 32e1329693c488c4b833f23c6949baba51cb8395 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 27 Aug 2024 17:20:02 +0800 Subject: [PATCH 04/24] Fix comments Signed-off-by: Chong Gao --- cpp/benchmarks/string/find.cpp | 13 ++- cpp/include/cudf/strings/find.hpp | 11 ++- cpp/src/strings/search/find.cu | 81 +++++++------------ cpp/tests/strings/find_tests.cpp | 15 ++-- .../main/java/ai/rapids/cudf/ColumnView.java | 12 ++- java/src/main/native/src/ColumnViewJni.cpp | 29 ++----- .../java/ai/rapids/cudf/ColumnVectorTest.java | 7 +- 7 files changed, 64 insertions(+), 104 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index a729563ae40..c7bdbb0fa40 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -73,23 +73,22 @@ static void bench_find_string(nvbench::state& state) } else if (api == "contains") { constexpr int iters = 20; std::vector match_targets({"123", "abc", "4567890", "DEFGHI", "5W43"}); - auto scalar_targets = std::vector{}; + auto multi_targets = std::vector{}; for (int i = 0; i < iters; i++) { - scalar_targets.emplace_back(cudf::string_scalar(match_targets[i % match_targets.size()])); + multi_targets.emplace_back(match_targets[i % match_targets.size()]); } - auto targets = std::vector>(scalar_targets.begin(), - scalar_targets.end()); + cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), multi_targets.end()); constexpr bool combine = false; if constexpr (not combine) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - for (size_t i = 0; i < scalar_targets.size(); i++) { - cudf::strings::contains(input, scalar_targets[i]); + for (size_t i = 0; i < multi_targets.size(); i++) { + cudf::strings::contains(input, cudf::string_scalar(multi_targets[i])); } }); } else { // combine state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { cudf::strings::contains(input, targets); }); + [&](nvbench::launch& launch) { cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); }); } } else if (api == "starts_with") { state.exec(nvbench::exec_tag::sync, diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index e8d7f608aea..f86203cb46a 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -152,16 +152,21 @@ std::unique_ptr contains( * If the target is an empty string, true is returned for all non-null entries in the output column. * * Any null string entries return corresponding null entries in the output columns. - * + * e.g.: + * input: "a", "b", "c" + * targets: "a", "c" + * output is a table with two boolean columns: + * column_0: true, false, false + * column_1: false, false, true * @param input Strings instance for this operation * @param targets UTF-8 encoded strings to search for in each string in `input` * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New BOOL8 column */ -std::unique_ptr
contains( +std::unique_ptr
multi_contains( strings_column_view const& input, - std::vector> const& targets, + strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 637fd4a6bf5..b0acd057ee2 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -415,7 +415,7 @@ std::unique_ptr contains_warp_parallel(strings_column_view const& input, } CUDF_KERNEL void multi_contains_fn(column_device_view const d_strings, - cudf::device_span d_targets, + column_device_view const d_targets, cudf::device_span d_results) { auto const str_idx = static_cast(cudf::detail::grid_1d::global_thread_id()); @@ -427,19 +427,15 @@ CUDF_KERNEL void multi_contains_fn(column_device_view const d_strings, // check empty target for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { - auto const d_target = d_targets[target_idx]; - if (d_target.size_bytes() == 0) { - d_results[target_idx][str_idx] = true; // Empty string is always found. - } else { - d_results[target_idx][str_idx] = false; - } + auto const d_target = d_targets.element(target_idx); + d_results[target_idx][str_idx] = d_target.size_bytes() == 0; } for (auto str_byte_idx = 0; str_byte_idx < d_str.size_bytes(); ++str_byte_idx) { // iterate the start index in the string for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { // iterate targets if (!d_results[target_idx][str_idx]) { // not found before - auto const d_target = d_targets[target_idx]; + auto const d_target = d_targets.element(target_idx); if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes() && (d_target.compare(d_str.data() + str_byte_idx, d_target.size_bytes()) == 0)) { // found @@ -450,10 +446,9 @@ CUDF_KERNEL void multi_contains_fn(column_device_view const d_strings, } } -CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( - column_device_view const d_strings, - cudf::device_span d_targets, - cudf::device_span d_results) +CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn(column_device_view const d_strings, + column_device_view const d_targets, + cudf::device_span d_results) { auto const num_targets = d_targets.size(); auto const num_rows = d_strings.size(); @@ -473,14 +468,14 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( for (size_t target_idx = 0; target_idx < num_targets; target_idx++) { // Identify the target. - auto const d_target = d_targets[target_idx]; + auto const d_target = d_targets.element(target_idx); // each thread of the warp will check just part of the string auto found = false; if (d_target.empty()) { found = true; } else { - for (auto i = static_cast(idx % cudf::detail::warp_size); + for (auto i = static_cast(lane_idx); !found && ((i + d_target.size_bytes()) <= d_str.size_bytes()); i += cudf::detail::warp_size) { // check the target matches this part of the d_str data @@ -493,35 +488,18 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( } } -std::vector> multi_contains( - strings_column_view const& input, - std::vector> const& targets, - bool warp_parallel, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::vector> multi_contains(strings_column_view const& input, + strings_column_view const& targets, + bool warp_parallel, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_targets = static_cast(targets.size()); - CUDF_EXPECTS(not targets.empty(), "Must specify at least one target string."); - CUDF_EXPECTS(std::all_of(targets.begin(), - targets.end(), - [&](auto const& target) { return target.get().is_valid(stream); }), - "Target search strings must be valid."); - - // Convert targets into string-views for querying. Copy to device. - auto device_targets = [&] { - auto const host_target_iter = - thrust::make_transform_iterator(targets.begin(), [](auto const& ref) { - return string_view{ref.get().data(), ref.get().size()}; - }); - auto const host_targets = - std::vector(host_target_iter, host_target_iter + targets.size()); - return cudf::detail::make_device_uvector_async( - host_targets, stream, rmm::mr::get_current_device_resource()); - }(); + CUDF_EXPECTS(not targets.is_empty(), "Must specify at least one target string."); // Create output columns. auto const results_iter = - thrust::make_transform_iterator(targets.begin(), [&](auto const& target) { + thrust::make_transform_iterator(thrust::counting_iterator(0), [&](int i) { return make_numeric_column(data_type{type_id::BOOL8}, input.size(), cudf::detail::copy_bitmask(input.parent(), stream, mr), @@ -531,7 +509,6 @@ std::vector> multi_contains( }); auto results_list = std::vector>(results_iter, results_iter + targets.size()); - auto device_results_list = [&] { auto host_results_pointer_iter = thrust::make_transform_iterator(results_list.begin(), [](auto const& results_column) { @@ -547,18 +524,20 @@ std::vector> multi_contains( constexpr int block_size = 256; // launch warp per string auto const d_strings = column_device_view::create(input.parent(), stream); + auto const d_targets = column_device_view::create(targets.parent(), stream); if (warp_parallel) { - cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size * num_targets, block_size}; + // one warp handles multi-targets for a string + cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; multi_contains_warp_parallel_multi_scalars_fn<<>>( - *d_strings, device_targets, device_results_list); + *d_strings, *d_targets, device_results_list); } else { cudf::detail::grid_1d grid{input.size(), block_size}; multi_contains_fn<<>>( - *d_strings, device_targets, device_results_list); + *d_strings, *d_targets, device_results_list); } return results_list; } @@ -710,10 +689,10 @@ std::unique_ptr contains(strings_column_view const& input, return contains_small_strings_impl(input, target, stream, mr); } -std::unique_ptr
contains(strings_column_view const& input, - std::vector> const& targets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
multi_contains(strings_column_view const& input, + strings_column_view const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto result_columns = [&] { if ((input.null_count() < input.size()) && @@ -807,13 +786,13 @@ std::unique_ptr contains(strings_column_view const& strings, return detail::contains(strings, target, stream, mr); } -std::unique_ptr
contains(strings_column_view const& strings, - std::vector> const& targets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr
multi_contains(strings_column_view const& strings, + strings_column_view const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::contains(strings, targets, stream, mr); + return detail::multi_contains(strings, targets, stream, mr); } std::unique_ptr contains(strings_column_view const& strings, diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index c766d602a28..ad600ad206b 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -211,16 +211,11 @@ TEST_F(StringsFindTest, MultiContains) ""}, null_at(8)}; auto strings_view = cudf::strings_column_view(strings); - - auto search_key_0 = cudf::string_scalar{" the "}; - auto search_key_1 = cudf::string_scalar{"a"}; - auto search_key_2 = cudf::string_scalar{""}; - auto search_keys = std::vector>{}; - search_keys.emplace_back(search_key_0); - search_keys.emplace_back(search_key_1); - search_keys.emplace_back(search_key_2); - - auto results = cudf::strings::contains(strings_view, search_keys); + std::vector match_targets({" the ", "a", ""}); + cudf::test::strings_column_wrapper multi_targets_column(match_targets.begin(), + match_targets.end()); + auto results = + cudf::strings::multi_contains(strings_view, cudf::strings_column_view(multi_targets_column)); auto expected_0 = cudf::test::fixed_width_column_wrapper({0, 1, 0, 1, 0, 0, 0, 0, 0}, null_at(8)); auto expected_1 = diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 5f0b7ce172d..6ec423c0e66 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -3360,13 +3360,11 @@ private static long[] toPrimitive(Long[] longs) { return ret; } - public final ColumnVector[] stringContains(Scalar[] compStrings) { + public final ColumnVector[] stringContains(ColumnView targets) { assert type.equals(DType.STRING) : "column type must be a String"; - assert Arrays.stream(compStrings).allMatch(Objects::nonNull) : "compString scalars may not be null"; - assert Arrays.stream(compStrings).allMatch(str -> str.getType().equals(DType.STRING)) : "compString scalars must be string scalars"; - Long[] scalarHandles = Arrays.stream(compStrings).map(Scalar::getScalarHandle).toArray(Long[]::new); - - long[] resultPointers = stringContainsMulti(getNativeView(), toPrimitive(scalarHandles)); + assert targets.getType().equals(DType.STRING) : "targets type must be a string"; + assert targets.getNullCount() > 0 : "targets must not be null"; + long[] resultPointers = stringContainsMulti(getNativeView(), targets.getNativeView()); return Arrays.stream(resultPointers).mapToObj(ColumnVector::new).toArray(ColumnVector[]::new); } @@ -4478,7 +4476,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat /** * Check multiple target strings against the same input column. */ - private static native long[] stringContainsMulti(long cudfViewHandle, long[] compStrings) throws CudfException; + private static native long[] stringContainsMulti(long cudfViewHandle, long targets) throws CudfException; /** * Native method for extracting results from a regex program pattern. Returns a table handle. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 6d1f656a2ad..1465267581f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1522,35 +1522,22 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringContains(JNIEnv* en CATCH_STD(env, 0); } -JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringContainsMulti( - JNIEnv* env, jobject j_object, jlong j_view_handle, jlongArray j_comp_strings) +JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_stringContainsMulti(JNIEnv* env, + jobject j_object, + jlong j_view_handle, + jlong comp_strings) { JNI_NULL_CHECK(env, j_view_handle, "column is null", 0); - JNI_NULL_CHECK(env, j_comp_strings, "array of comparison scalars is null", 0); + JNI_NULL_CHECK(env, comp_strings, "targets is null", 0); try { cudf::jni::auto_set_device(env); auto* column_view = reinterpret_cast(j_view_handle); + auto* targets_view = reinterpret_cast(comp_strings); auto const strings_column = cudf::strings_column_view(*column_view); - auto comp_string_pointers = - cudf::jni::native_jpointerArray{env, j_comp_strings}; - auto comp_strings = std::vector{}; - std::transform(comp_string_pointers.begin(), - comp_string_pointers.end(), - std::back_inserter(comp_strings), - [](auto i) { return *i; }); - auto comp_strings_refs = std::vector>( - comp_strings.begin(), comp_strings.end()); - auto contains_results = cudf::strings::contains(strings_column, comp_strings_refs); - // auto comp_strings_iter = thrust::make_transform_iterator(comp_string_pointers.begin(), - // [](auto i) -> cudf::string_scalar const& { return *i; }); auto comp_strings = - // std::vector>(comp_strings_iter, - // comp_strings_iter - // + - // comp_string_pointers.size()); - // auto contains_results = cudf::strings::contains(strings_column, comp_strings); + auto const targets_column = cudf::strings_column_view(*targets_view); + auto contains_results = cudf::strings::multi_contains(strings_column, targets_column); return cudf::jni::convert_table_for_return(env, std::move(contains_results)); - // return 0; } CATCH_STD(env, 0); } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index bfa0db940a1..573c4b1488c 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3836,15 +3836,12 @@ void testStringContainsMulti() { "On a winter's day", null, ""); - Scalar needle0 = Scalar.fromString("the"); - Scalar needle1 = Scalar.fromString("a"); + ColumnVector targets = ColumnVector.fromStrings("the", "a"); ColumnVector expected0 = ColumnVector.fromBoxedBooleans(true, true, false, false, null, false); ColumnVector expected1 = ColumnVector.fromBoxedBooleans(true, false, true, true, null, false)) { - - results = haystack.stringContains(new Scalar[]{needle0, needle1}); + results = haystack.stringContains(targets); assertColumnsAreEqual(results[0], expected0); assertColumnsAreEqual(results[1], expected1); - } finally { if (results != null) { for (ColumnVector c : results) { From be6985b8ae02b3f41560d38a157260571342abce Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 29 Aug 2024 16:35:48 +0800 Subject: [PATCH 05/24] Use new approach to improve perf: index the first chars in the targets --- cpp/benchmarks/string/find.cpp | 31 +++- cpp/src/strings/search/find.cu | 267 ++++++++++++++++++++++++++------- 2 files changed, 233 insertions(+), 65 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index c7bdbb0fa40..e7287189f6d 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -71,15 +71,27 @@ static void bench_find_string(nvbench::state& state) cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); }); } else if (api == "contains") { - constexpr int iters = 20; - std::vector match_targets({"123", "abc", "4567890", "DEFGHI", "5W43"}); + constexpr bool combine = true; // test true/false + bool has_same_target_first_char = true; // test true/false + constexpr int iters = 4; // test 4/10 + + std::vector match_targets({" abc", + "W43", + "0987 5W43", + "123 abc", + "23 abc", + "3 abc", + "5W", + "7 5W43", + "87 5W43", + "987 5W43"}); auto multi_targets = std::vector{}; for (int i = 0; i < iters; i++) { - multi_targets.emplace_back(match_targets[i % match_targets.size()]); + // if has same first chars in targets, use duplicated targets. + int idx = has_same_target_first_char ? i / 2 : i; + multi_targets.emplace_back(match_targets[idx]); } - cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), multi_targets.end()); - constexpr bool combine = false; if constexpr (not combine) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { for (size_t i = 0; i < multi_targets.size(); i++) { @@ -87,8 +99,11 @@ static void bench_find_string(nvbench::state& state) } }); } else { // combine - state.exec(nvbench::exec_tag::sync, - [&](nvbench::launch& launch) { cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); }); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), + multi_targets.end()); + cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); + }); } } else if (api == "starts_with") { state.exec(nvbench::exec_tag::sync, @@ -101,7 +116,7 @@ static void bench_find_string(nvbench::state& state) NVBENCH_BENCH(bench_find_string) .set_name("find_string") - .add_string_axis("api", {"find", "find_multi", "contains", "starts_with", "ends_with"}) + .add_string_axis("api", {"contains"}) .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) .add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216}) .add_int64_axis("hit_rate", {20, 80}); // percentage diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index b0acd057ee2..314abb44c84 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,12 @@ #include #include #include +#include #include +#include #include #include +#include #include namespace cudf { @@ -414,38 +418,6 @@ std::unique_ptr contains_warp_parallel(strings_column_view const& input, return results; } -CUDF_KERNEL void multi_contains_fn(column_device_view const d_strings, - column_device_view const d_targets, - cudf::device_span d_results) -{ - auto const str_idx = static_cast(cudf::detail::grid_1d::global_thread_id()); - auto const num_targets = d_targets.size(); - auto const num_rows = d_strings.size(); - if (str_idx >= num_rows) { return; } - if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. - auto const d_str = d_strings.element(str_idx); - - // check empty target - for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { - auto const d_target = d_targets.element(target_idx); - d_results[target_idx][str_idx] = d_target.size_bytes() == 0; - } - - for (auto str_byte_idx = 0; str_byte_idx < d_str.size_bytes(); - ++str_byte_idx) { // iterate the start index in the string - for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { // iterate targets - if (!d_results[target_idx][str_idx]) { // not found before - auto const d_target = d_targets.element(target_idx); - if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes() && - (d_target.compare(d_str.data() + str_byte_idx, d_target.size_bytes()) == 0)) { - // found - d_results[target_idx][str_idx] = true; - } - } - } - } -} - CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn(column_device_view const d_strings, column_device_view const d_targets, cudf::device_span d_results) @@ -488,16 +460,157 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn(column_device_vie } } -std::vector> multi_contains(strings_column_view const& input, - strings_column_view const& targets, - bool warp_parallel, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +CUDF_KERNEL void multi_contains_using_indexes_fn( + column_device_view const d_strings, + column_device_view const d_targets, + cudf::device_span const d_target_first_bytes, + column_device_view const d_target_indexes_for_first_bytes, + cudf::device_span d_results) +{ + auto const str_idx = static_cast(cudf::detail::grid_1d::global_thread_id()); + auto const num_targets = d_targets.size(); + auto const num_rows = d_strings.size(); + if (str_idx >= num_rows) { return; } + if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. + auto const d_str = d_strings.element(str_idx); + + // check empty target, the result of searching empty target is true. + for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { + auto const d_target = d_targets.element(target_idx); + d_results[target_idx][str_idx] = d_target.size_bytes() == 0; + } + + for (auto str_byte_idx = 0; str_byte_idx < d_str.size_bytes(); + ++str_byte_idx) { // iterate the start index in the string + + // binary search in the target first char set. + char c = *(d_str.data() + str_byte_idx); + auto first_byte_ptr = + thrust::lower_bound(thrust::seq, d_target_first_bytes.begin(), d_target_first_bytes.end(), c); + + if (not(first_byte_ptr != d_target_first_bytes.end() && *first_byte_ptr == c)) { + // For non-empty targets: no need to search for `str_byte_idx` position, because first char is + // unmatched. For empty targets: already set result as found. + continue; + } + + int first_char_index_in_list = first_byte_ptr - d_target_first_bytes.begin(); + // get possible targets + auto const possible_targets_list = + cudf::list_device_view{d_target_indexes_for_first_bytes, first_char_index_in_list}; + + for (auto i = 0; i < possible_targets_list.size(); ++i) { // iterate possible targets + auto target_idx = possible_targets_list.element(i); + if (!d_results[target_idx][str_idx]) { // not found before + auto const d_target = d_targets.element(target_idx); + if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes() && + (d_target.compare(d_str.data() + str_byte_idx, d_target.size_bytes()) == 0)) { + // found + d_results[target_idx][str_idx] = true; + } + } + } + } +} + +/** + * Execute multi contains for short strings + * First index the first char for all targets. + * Index the first char: + * collect first char for all targets and do uniq and sort, + * then index the targets for the first char. + * e.g.: + * targets: xa xb ac ad af + * first char set is: (a, x) + * index result is: + * { + * a: [2, 3, 4], // indexes for: ac ad af + * x: [0, 1] // indexes for: xa xb + * } + * when do searching: + * find (binary search) from `first char set` for a char in string: + * if char in string is not in ['a', 'x'], fast skip + * if char in string is 'x', then only need to try ["xa", "xb"] targets. + * if char in string is 'a', then only need to try ["ac", "ad", "af"] targets. + * + */ +std::vector> multi_contains_using_indexes( + strings_column_view const& input, + strings_column_view const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_targets = static_cast(targets.size()); CUDF_EXPECTS(not targets.is_empty(), "Must specify at least one target string."); - // Create output columns. + // 1. copy targets from device to host + auto const h_targets_child = cudf::detail::make_std_vector_sync( + cudf::device_span(targets.chars_begin(stream), targets.chars_size(stream)), stream); + auto const targets_offsets = targets.offsets(); + auto const h_targets_offsets = cudf::detail::make_std_vector_sync( + cudf::device_span{targets_offsets.data(), + static_cast(targets_offsets.size())}, + stream); + + // 2. index the first characters in targets + // 2.1 collect first characters in targets + thrust::host_vector h_first_bytes = {}; + for (auto i = 0; i < targets.size(); i++) { + auto target_begin_offset = h_targets_offsets[i]; + auto target_end_offset = h_targets_offsets[i + 1]; + if (target_end_offset - target_begin_offset > 0) { + char first_char = h_targets_child[target_begin_offset]; + auto no_exist = + thrust::find(h_first_bytes.begin(), h_first_bytes.end(), first_char) == h_first_bytes.end(); + if (no_exist) { h_first_bytes.push_back(first_char); } + } + } + + // 2.2 sort the first characters + thrust::sort(h_first_bytes.begin(), h_first_bytes.end()); + + // 2.3 generate indexes: map from `first char in target` to `target indexes` + thrust::host_vector h_offsets = {0}; + thrust::host_vector h_elements = {}; + for (size_t i = 0; i < h_first_bytes.size(); i++) { + auto expected_first_byte = h_first_bytes[i]; + for (auto target_idx = 0; target_idx < targets.size(); target_idx++) { + auto target_begin_offset = h_targets_offsets[target_idx]; + auto target_end_offset = h_targets_offsets[target_idx + 1]; + if (target_end_offset - target_begin_offset > 0) { + char curr_first_byte = h_targets_child[target_begin_offset]; + if (expected_first_byte == curr_first_byte) { h_elements.push_back(target_idx); } + } + } + h_offsets.push_back(h_elements.size()); + } + + // 2.4 copy first char set and first char indexes to device + auto d_first_bytes = cudf::detail::make_device_uvector_async(h_first_bytes, stream, mr); + auto d_offsets = cudf::detail::make_device_uvector_async(h_offsets, stream, mr); + auto d_elements = cudf::detail::make_device_uvector_async(h_elements, stream, mr); + auto offsets_column = std::make_unique(cudf::data_type{cudf::type_id::INT32}, + h_offsets.size(), + d_offsets.release(), + rmm::device_buffer{}, // null mask + 0 // null size + ); + auto element_column = std::make_unique(cudf::data_type{cudf::type_id::INT32}, + h_elements.size(), + d_elements.release(), + rmm::device_buffer{}, // null mask + 0 // null size + ); + auto list_column = cudf::make_lists_column(h_first_bytes.size(), + std::move(offsets_column), + std::move(element_column), + 0, // null count + rmm::device_buffer{}, // null mask + stream, + mr); + auto d_list_column = column_device_view::create(list_column->view(), stream); + + // 3. Create output columns. auto const results_iter = thrust::make_transform_iterator(thrust::counting_iterator(0), [&](int i) { return make_numeric_column(data_type{type_id::BOOL8}, @@ -519,26 +632,66 @@ std::vector> multi_contains(strings_column_view const& i return cudf::detail::make_device_uvector_async(host_results_pointers, stream, mr); }(); - // Populate all output vectors, - - constexpr int block_size = 256; - // launch warp per string auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_targets = column_device_view::create(targets.parent(), stream); - if (warp_parallel) { - // one warp handles multi-targets for a string - cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; - multi_contains_warp_parallel_multi_scalars_fn<<>>( - *d_strings, *d_targets, device_results_list); - } else { - cudf::detail::grid_1d grid{input.size(), block_size}; - multi_contains_fn<<>>( - *d_strings, *d_targets, device_results_list); - } + constexpr int block_size = 256; + cudf::detail::grid_1d grid{input.size(), block_size}; + + multi_contains_using_indexes_fn<<>>( + *d_strings, *d_targets, d_first_bytes, *d_list_column, device_results_list); + return results_list; +} + +/** + * Execute multi contains for long strings + */ +std::vector> multi_contains_using_warp_parallel( + strings_column_view const& input, + strings_column_view const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_targets = static_cast(targets.size()); + CUDF_EXPECTS(not targets.is_empty(), "Must specify at least one target string."); + + // Create output columns. + auto const results_iter = + thrust::make_transform_iterator(thrust::counting_iterator(0), [&](int i) { + return make_numeric_column(data_type{type_id::BOOL8}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); + }); + auto results_list = + std::vector>(results_iter, results_iter + targets.size()); + auto device_results_list = [&] { + auto host_results_pointer_iter = + thrust::make_transform_iterator(results_list.begin(), [](auto const& results_column) { + return results_column->mutable_view().template data(); + }); + auto host_results_pointers = std::vector( + host_results_pointer_iter, host_results_pointer_iter + results_list.size()); + return cudf::detail::make_device_uvector_async(host_results_pointers, stream, mr); + }(); + + constexpr int block_size = 256; + auto const d_strings = column_device_view::create(input.parent(), stream); + auto const d_targets = column_device_view::create(targets.parent(), stream); + + // launch warp per string; one warp handles multi-targets for the same string. + cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; + multi_contains_warp_parallel_multi_scalars_fn<<>>( + *d_strings, *d_targets, device_results_list); + return results_list; } @@ -699,10 +852,10 @@ std::unique_ptr
multi_contains(strings_column_view const& input, ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { // Large strings. // use warp parallel when the average string width is greater than the threshold - return multi_contains(input, targets, /*warp_parallel=*/true, stream, mr); + return multi_contains_using_warp_parallel(input, targets, stream, mr); } else { // Small strings. Searching for multiple targets in one thread seems to work fastest. - return multi_contains(input, targets, /*warp_parallel=*/false, stream, mr); + return multi_contains_using_indexes(input, targets, stream, mr); } }(); return std::make_unique
(std::move(result_columns)); From be7a1e2f39b61aabd01407911f54a8b5b9db77c1 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 29 Aug 2024 17:15:51 +0800 Subject: [PATCH 06/24] Fix comments; Restore a test change --- cpp/benchmarks/string/find.cpp | 2 +- cpp/src/strings/search/find.cu | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index e7287189f6d..fb8dc24b78b 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -116,7 +116,7 @@ static void bench_find_string(nvbench::state& state) NVBENCH_BENCH(bench_find_string) .set_name("find_string") - .add_string_axis("api", {"contains"}) + .add_string_axis("api", {"find", "find_multi", "contains", "starts_with", "ends_with"}) .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) .add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216}) .add_int64_axis("hit_rate", {20, 80}); // percentage diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 314abb44c84..e23e7cc9b3e 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -847,6 +847,7 @@ std::unique_ptr
multi_contains(strings_column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(not targets.has_nulls(), "Target strings cannot be null"); auto result_columns = [&] { if ((input.null_count() < input.size()) && ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { From 479788c5085238ae092af26b794428fa2b72edfa Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 29 Aug 2024 17:42:25 +0800 Subject: [PATCH 07/24] Improve --- cpp/src/strings/search/find.cu | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index e23e7cc9b3e..f25833ef0b3 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -503,10 +503,16 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( auto target_idx = possible_targets_list.element(i); if (!d_results[target_idx][str_idx]) { // not found before auto const d_target = d_targets.element(target_idx); - if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes() && - (d_target.compare(d_str.data() + str_byte_idx, d_target.size_bytes()) == 0)) { - // found - d_results[target_idx][str_idx] = true; + if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes()) { + // first char already checked, only need to check the [2nd, end) chars if has. + bool found = true; + for (auto i = 1; i < d_target.size(); i++) { + if (*(d_str.data() + str_byte_idx + i) != *(d_target.data() + i)) { + found = false; + break; + } + } + if (found) { d_results[target_idx][str_idx] = true; } } } } From 543a1f692fc453cfca0610b85b94dbfc4d38fa31 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Fri, 30 Aug 2024 10:24:51 +0800 Subject: [PATCH 08/24] Fix compile error Signed-off-by: Chong Gao --- cpp/src/strings/search/find.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index f25833ef0b3..ce4c2673861 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -506,7 +506,7 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes()) { // first char already checked, only need to check the [2nd, end) chars if has. bool found = true; - for (auto i = 1; i < d_target.size(); i++) { + for (auto i = 1; i < d_target.size_bytes(); i++) { if (*(d_str.data() + str_byte_idx + i) != *(d_target.data() + i)) { found = false; break; From 06ba14c59a0efa9ca2fbaf98cd1beb36bb151ecd Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Fri, 30 Aug 2024 11:50:57 +0800 Subject: [PATCH 09/24] Update test cases; update benchmark tests --- cpp/benchmarks/string/find.cpp | 24 +++++++++++++++++++----- cpp/tests/strings/find_tests.cpp | 5 ++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index fb8dc24b78b..90d86002ab2 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -71,9 +71,10 @@ static void bench_find_string(nvbench::state& state) cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); }); } else if (api == "contains") { - constexpr bool combine = true; // test true/false - bool has_same_target_first_char = true; // test true/false - constexpr int iters = 4; // test 4/10 + constexpr bool combine = false; // test true/false + bool has_same_target_first_char = false; // test true/false + constexpr int iters = 10; // test 4/10 + bool check_result = false; std::vector match_targets({" abc", "W43", @@ -81,7 +82,7 @@ static void bench_find_string(nvbench::state& state) "123 abc", "23 abc", "3 abc", - "5W", + "é", "7 5W43", "87 5W43", "987 5W43"}); @@ -94,8 +95,21 @@ static void bench_find_string(nvbench::state& state) if constexpr (not combine) { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + std::vector> contains_results; + std::vector contains_cvs; for (size_t i = 0; i < multi_targets.size(); i++) { - cudf::strings::contains(input, cudf::string_scalar(multi_targets[i])); + contains_results.emplace_back(cudf::strings::contains(input, cudf::string_scalar(multi_targets[i]))); + contains_cvs.emplace_back(contains_results.back()->view()); + } + + if (check_result) { + cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), + multi_targets.end()); + auto tab = cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); + for (int i = 0; i < tab->num_columns(); i++) + { + cudf::test::detail::expect_columns_equal(contains_cvs[i], tab->get_column(i).view()); + } } }); } else { // combine diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index ad600ad206b..66414dfc202 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -211,7 +211,7 @@ TEST_F(StringsFindTest, MultiContains) ""}, null_at(8)}; auto strings_view = cudf::strings_column_view(strings); - std::vector match_targets({" the ", "a", ""}); + std::vector match_targets({" the ", "a", "", "é"}); cudf::test::strings_column_wrapper multi_targets_column(match_targets.begin(), match_targets.end()); auto results = @@ -222,9 +222,12 @@ TEST_F(StringsFindTest, MultiContains) cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 0, 0}, null_at(8)); auto expected_2 = cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 1, 0}, null_at(8)); + auto expected_3 = + cudf::test::fixed_width_column_wrapper({1, 0, 0, 0, 0, 0, 0, 0, 0}, null_at(8)); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), expected_0); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), expected_1); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), expected_2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(3), expected_3); } TEST_F(StringsFindTest, StartsWith) From 587ce342bde80dc80437810b83a24cf868a819b3 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 2 Sep 2024 09:22:12 +0800 Subject: [PATCH 10/24] Format code --- cpp/benchmarks/string/find.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 90d86002ab2..6c19771b737 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -98,16 +98,17 @@ static void bench_find_string(nvbench::state& state) std::vector> contains_results; std::vector contains_cvs; for (size_t i = 0; i < multi_targets.size(); i++) { - contains_results.emplace_back(cudf::strings::contains(input, cudf::string_scalar(multi_targets[i]))); + contains_results.emplace_back( + cudf::strings::contains(input, cudf::string_scalar(multi_targets[i]))); contains_cvs.emplace_back(contains_results.back()->view()); } if (check_result) { cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), - multi_targets.end()); - auto tab = cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); - for (int i = 0; i < tab->num_columns(); i++) - { + multi_targets.end()); + auto tab = + cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); + for (int i = 0; i < tab->num_columns(); i++) { cudf::test::detail::expect_columns_equal(contains_cvs[i], tab->get_column(i).view()); } } From 470355f54d0a128b782da509b6d3850d9d40cb54 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 2 Sep 2024 14:31:38 +0800 Subject: [PATCH 11/24] Fix bug --- java/src/main/java/ai/rapids/cudf/ColumnView.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index cc85b10acc3..e113518229b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -3344,7 +3344,7 @@ private static long[] toPrimitive(Long[] longs) { public final ColumnVector[] stringContains(ColumnView targets) { assert type.equals(DType.STRING) : "column type must be a String"; assert targets.getType().equals(DType.STRING) : "targets type must be a string"; - assert targets.getNullCount() > 0 : "targets must not be null"; + assert targets.getNullCount() == 0 : "targets must not be null"; long[] resultPointers = stringContainsMulti(getNativeView(), targets.getNativeView()); return Arrays.stream(resultPointers).mapToObj(ColumnVector::new).toArray(ColumnVector[]::new); } From e56a12271001be282a0d25a7a8f9df09144d8b01 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 4 Sep 2024 17:03:40 +0800 Subject: [PATCH 12/24] Fix comments --- cpp/benchmarks/string/find.cpp | 11 ----------- cpp/include/cudf/strings/find.hpp | 3 +++ 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 6c19771b737..8c9c4d60f0c 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -74,7 +74,6 @@ static void bench_find_string(nvbench::state& state) constexpr bool combine = false; // test true/false bool has_same_target_first_char = false; // test true/false constexpr int iters = 10; // test 4/10 - bool check_result = false; std::vector match_targets({" abc", "W43", @@ -102,16 +101,6 @@ static void bench_find_string(nvbench::state& state) cudf::strings::contains(input, cudf::string_scalar(multi_targets[i]))); contains_cvs.emplace_back(contains_results.back()->view()); } - - if (check_result) { - cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), - multi_targets.end()); - auto tab = - cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); - for (int i = 0; i < tab->num_columns(); i++) { - cudf::test::detail::expect_columns_equal(contains_cvs[i], tab->get_column(i).view()); - } - } }); } else { // combine state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index f86203cb46a..cadec213d70 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -153,11 +153,14 @@ std::unique_ptr contains( * * Any null string entries return corresponding null entries in the output columns. * e.g.: + * @code * input: "a", "b", "c" * targets: "a", "c" * output is a table with two boolean columns: * column_0: true, false, false * column_1: false, false, true + * @endcode + * * @param input Strings instance for this operation * @param targets UTF-8 encoded strings to search for in each string in `input` * @param stream CUDA stream used for device memory operations and kernel launches From 31f4822c90dc66c92808624e592e1a8cc14c128a Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 5 Sep 2024 17:32:10 +0800 Subject: [PATCH 13/24] Optimize warp parallel --- cpp/src/strings/search/find.cu | 183 +++++++++++++++++---------------- 1 file changed, 97 insertions(+), 86 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index ce4c2673861..48aa39c747a 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -418,45 +418,94 @@ std::unique_ptr contains_warp_parallel(strings_column_view const& input, return results; } -CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn(column_device_view const d_strings, - column_device_view const d_targets, - cudf::device_span d_results) +/** + * Each string uses a warp(32 threads) to handle all the targets. + * Each thread uses num_targets bools shared memory to store temp result for each lane. + */ +CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( + column_device_view const d_strings, + column_device_view const d_targets, + cudf::device_span const d_target_first_bytes, + column_device_view const d_target_indexes_for_first_bytes, + cudf::device_span d_results) { auto const num_targets = d_targets.size(); auto const num_rows = d_strings.size(); - auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); - using warp_reduce = cub::WarpReduce; - __shared__ typename warp_reduce::TempStorage temp_storage; - + auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); if (idx >= (num_rows * cudf::detail::warp_size)) { return; } auto const lane_idx = idx % cudf::detail::warp_size; auto const str_idx = idx / cudf::detail::warp_size; if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. - // get the string for this warp auto const d_str = d_strings.element(str_idx); - for (size_t target_idx = 0; target_idx < num_targets; target_idx++) { - // Identify the target. + /** + * size of shared_bools = Min(targets_size * block_size, target_group * block_size) + * each thread uses targets_size bools + */ + extern __shared__ bool shared_bools[]; + + // initialize temp result: + // set true if target is empty, set false otherwise + for (int target_idx = 0; target_idx < num_targets; target_idx++) { auto const d_target = d_targets.element(target_idx); + shared_bools[threadIdx.x * num_targets + target_idx] = d_target.size_bytes() == 0; + } - // each thread of the warp will check just part of the string - auto found = false; - if (d_target.empty()) { - found = true; - } else { - for (auto i = static_cast(lane_idx); - !found && ((i + d_target.size_bytes()) <= d_str.size_bytes()); - i += cudf::detail::warp_size) { - // check the target matches this part of the d_str data - if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; } + for (size_type str_byte_idx = lane_idx; str_byte_idx < d_str.size_bytes(); + str_byte_idx += cudf::detail::warp_size) { + // 1. check the first chars using binary search on first char set + char c = *(d_str.data() + str_byte_idx); + auto first_byte_ptr = + thrust::lower_bound(thrust::seq, d_target_first_bytes.begin(), d_target_first_bytes.end(), c); + if (not(first_byte_ptr != d_target_first_bytes.end() && *first_byte_ptr == c)) { + // first char is not matched for all targets, already set result as found + continue; + } + + // 2. check the 2nd chars + int first_char_index_in_list = first_byte_ptr - d_target_first_bytes.begin(); + // get possible targets + auto const possible_targets_list = + cudf::list_device_view{d_target_indexes_for_first_bytes, first_char_index_in_list}; + for (auto list_idx = 0; list_idx < possible_targets_list.size(); + ++list_idx) { // iterate possible targets + auto target_idx = possible_targets_list.element(list_idx); + int temp_result_idx = threadIdx.x * num_targets + target_idx; + if (!shared_bools[temp_result_idx]) { // not found before + auto const d_target = d_targets.element(target_idx); + if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes()) { + // first char already checked, only need to check the [2nd, end) chars if has. + bool found = true; + for (auto i = 1; i < d_target.size_bytes(); i++) { + if (*(d_str.data() + str_byte_idx + i) != *(d_target.data() + i)) { + found = false; + break; + } + } + if (found) { shared_bools[temp_result_idx] = true; } + } + } + } + } + + // wait all lanes are done in a warp + __syncwarp(); + + if (lane_idx == 0) { + for (int target_idx = 0; target_idx < num_targets; target_idx++) { + bool found = false; + for (int lane_idx = 0; lane_idx < cudf::detail::warp_size; lane_idx++) { + bool temp_idx = (str_idx * cudf::detail::warp_size + lane_idx) * num_targets + target_idx; + if (shared_bools[temp_idx]) { + found = true; + break; + } } + d_results[target_idx][str_idx] = found; } - __syncwarp(); - auto const result = warp_reduce(temp_storage).Reduce(found, cub::Max()); - if (lane_idx == 0) { d_results[target_idx][str_idx] = result; } } } @@ -483,7 +532,7 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( for (auto str_byte_idx = 0; str_byte_idx < d_str.size_bytes(); ++str_byte_idx) { // iterate the start index in the string - // binary search in the target first char set. + // 1. check the first chars using binary search on first char set char c = *(d_str.data() + str_byte_idx); auto first_byte_ptr = thrust::lower_bound(thrust::seq, d_target_first_bytes.begin(), d_target_first_bytes.end(), c); @@ -499,8 +548,9 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( auto const possible_targets_list = cudf::list_device_view{d_target_indexes_for_first_bytes, first_char_index_in_list}; - for (auto i = 0; i < possible_targets_list.size(); ++i) { // iterate possible targets - auto target_idx = possible_targets_list.element(i); + for (auto list_idx = 0; list_idx < possible_targets_list.size(); + ++list_idx) { // iterate possible targets + auto target_idx = possible_targets_list.element(list_idx); if (!d_results[target_idx][str_idx]) { // not found before auto const d_target = d_targets.element(target_idx); if (d_str.size_bytes() - str_byte_idx >= d_target.size_bytes()) { @@ -540,11 +590,11 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( * if char in string is 'a', then only need to try ["ac", "ad", "af"] targets. * */ -std::vector> multi_contains_using_indexes( - strings_column_view const& input, - strings_column_view const& targets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::vector> multi_contains(bool warp_parallel, + strings_column_view const& input, + strings_column_view const& targets, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const num_targets = static_cast(targets.size()); CUDF_EXPECTS(not targets.is_empty(), "Must specify at least one target string."); @@ -644,59 +694,20 @@ std::vector> multi_contains_using_indexes( constexpr int block_size = 256; cudf::detail::grid_1d grid{input.size(), block_size}; - multi_contains_using_indexes_fn<<>>( - *d_strings, *d_targets, d_first_bytes, *d_list_column, device_results_list); - return results_list; -} - -/** - * Execute multi contains for long strings - */ -std::vector> multi_contains_using_warp_parallel( - strings_column_view const& input, - strings_column_view const& targets, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const num_targets = static_cast(targets.size()); - CUDF_EXPECTS(not targets.is_empty(), "Must specify at least one target string."); - - // Create output columns. - auto const results_iter = - thrust::make_transform_iterator(thrust::counting_iterator(0), [&](int i) { - return make_numeric_column(data_type{type_id::BOOL8}, - input.size(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), - input.null_count(), - stream, - mr); - }); - auto results_list = - std::vector>(results_iter, results_iter + targets.size()); - auto device_results_list = [&] { - auto host_results_pointer_iter = - thrust::make_transform_iterator(results_list.begin(), [](auto const& results_column) { - return results_column->mutable_view().template data(); - }); - auto host_results_pointers = std::vector( - host_results_pointer_iter, host_results_pointer_iter + results_list.size()); - return cudf::detail::make_device_uvector_async(host_results_pointers, stream, mr); - }(); - - constexpr int block_size = 256; - auto const d_strings = column_device_view::create(input.parent(), stream); - auto const d_targets = column_device_view::create(targets.parent(), stream); - - // launch warp per string; one warp handles multi-targets for the same string. - cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; - multi_contains_warp_parallel_multi_scalars_fn<<>>( - *d_strings, *d_targets, device_results_list); + if (warp_parallel) { + int shared_mem_size = block_size * targets.size(); + multi_contains_warp_parallel_multi_scalars_fn<<>>( + *d_strings, *d_targets, d_first_bytes, *d_list_column, device_results_list); + } else { + multi_contains_using_indexes_fn<<>>( + *d_strings, *d_targets, d_first_bytes, *d_list_column, device_results_list); + } return results_list; } @@ -859,10 +870,10 @@ std::unique_ptr
multi_contains(strings_column_view const& input, ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { // Large strings. // use warp parallel when the average string width is greater than the threshold - return multi_contains_using_warp_parallel(input, targets, stream, mr); + return multi_contains(/**warp parallel**/ true, input, targets, stream, mr); } else { // Small strings. Searching for multiple targets in one thread seems to work fastest. - return multi_contains_using_indexes(input, targets, stream, mr); + return multi_contains(/**warp parallel**/ false, input, targets, stream, mr); } }(); return std::make_unique
(std::move(result_columns)); From 6ae2c002526bad32ac9bcb84bc9f9696db9c800e Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Fri, 6 Sep 2024 15:26:50 +0800 Subject: [PATCH 14/24] Split targets to small groups to save shared memory when num of targets is large --- cpp/src/strings/search/find.cu | 29 ++++++++++++++++++++++++++-- cpp/tests/strings/find_tests.cpp | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 48aa39c747a..e61aa619cee 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -15,6 +15,8 @@ */ #include #include +#include +#include #include #include #include @@ -44,6 +46,8 @@ #include #include +#include // For std::min + namespace cudf { namespace strings { namespace detail { @@ -442,7 +446,7 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( auto const d_str = d_strings.element(str_idx); /** - * size of shared_bools = Min(targets_size * block_size, target_group * block_size) + * size of shared_bools = targets_size * block_size * each thread uses targets_size bools */ extern __shared__ bool shared_bools[]; @@ -870,7 +874,28 @@ std::unique_ptr
multi_contains(strings_column_view const& input, ((input.chars_size(stream) / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) { // Large strings. // use warp parallel when the average string width is greater than the threshold - return multi_contains(/**warp parallel**/ true, input, targets, stream, mr); + + static constexpr int target_group_size = 16; + if (targets.size() <= target_group_size) { + return multi_contains(/**warp parallel**/ true, input, targets, stream, mr); + } else { + // Too many targets will consume more shared memory, so split targets + std::vector> ret_columns; + ret_columns.resize(targets.size()); + size_type num_groups = (targets.size() + target_group_size - 1) / target_group_size; + for (size_type group_idx = 0; group_idx < num_groups; group_idx++) { + size_type start_target = group_idx * target_group_size; + size_type end_target = std::min(start_target + target_group_size, targets.size()); + auto target_goup = + cudf::detail::slice(targets.parent(), start_target, end_target, stream); + auto bool_columns = multi_contains( + /**warp parallel**/ true, input, strings_column_view(target_goup), stream, mr); + for (auto& c : bool_columns) { + ret_columns.push_back(std::move(c)); // take the ownership + } + } + return ret_columns; + } } else { // Small strings. Searching for multiple targets in one thread seems to work fastest. return multi_contains(/**warp parallel**/ false, input, targets, stream, mr); diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index 66414dfc202..f7eaf304b74 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -230,6 +230,39 @@ TEST_F(StringsFindTest, MultiContains) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(3), expected_3); } +TEST_F(StringsFindTest, MultiContainsMoreTargets) +{ + using cudf::test::iterators::null_at; + auto const strings = + cudf::test::strings_column_wrapper{{"there world and goodbye", "good", ""}, null_at(2)}; + auto strings_view = cudf::strings_column_view(strings); + std::vector targets({"goodbye", "non-exist", ""}); + + std::vector> expects; + expects.push_back(cudf::test::fixed_width_column_wrapper({1, 0, 0}, null_at(2))); + expects.push_back(cudf::test::fixed_width_column_wrapper({0, 0, 0}, null_at(2))); + expects.push_back(cudf::test::fixed_width_column_wrapper({1, 1, 0}, null_at(2))); + + std::vector match_targets; + int max_num_targets = 50; + + for (int num_targets = 1; num_targets < max_num_targets; num_targets++) { + match_targets.clear(); + for (int i = 0; i < num_targets; i++) { + match_targets.push_back(targets[i % targets.size()]); + } + + cudf::test::strings_column_wrapper multi_targets_column(match_targets.begin(), + match_targets.end()); + auto results = + cudf::strings::multi_contains(strings_view, cudf::strings_column_view(multi_targets_column)); + EXPECT_EQ(results->num_columns(), num_targets); + for (int i = 0; i < num_targets; i++) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(i), expects[i % expects.size()]); + } + } +} + TEST_F(StringsFindTest, StartsWith) { cudf::test::strings_column_wrapper strings({"Héllo", "thesé", "", "lease", "tést strings", ""}, From 3324671e6efa1f7cca458d542ed0053288c174e0 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 11 Sep 2024 17:36:00 +0800 Subject: [PATCH 15/24] Fix bug when strings are long: returns all falses. --- cpp/src/strings/search/find.cu | 2 +- cpp/tests/strings/find_tests.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 4b7d1af2b43..0b23da6ac20 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -502,7 +502,7 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( for (int target_idx = 0; target_idx < num_targets; target_idx++) { bool found = false; for (int lane_idx = 0; lane_idx < cudf::detail::warp_size; lane_idx++) { - bool temp_idx = (str_idx * cudf::detail::warp_size + lane_idx) * num_targets + target_idx; + int temp_idx = (str_idx * cudf::detail::warp_size + lane_idx) * num_targets + target_idx; if (shared_bools[temp_idx]) { found = true; break; diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index f7eaf304b74..81415c7703f 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -263,6 +263,26 @@ TEST_F(StringsFindTest, MultiContainsMoreTargets) } } +TEST_F(StringsFindTest, MultiContainsLongStrings) +{ + auto const input = cudf::test::strings_column_wrapper( + {"quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving", + "the following code snippet demonstrates how to use search for values in an ordered range", + "thé it returns the last position where value could be inserted without violating ordering", + "algorithms execution is parallelized as determined by an execution policy. t", + "he this is a continuation of previous row to make sure string boundaries are honored", + "abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ !@#$%^&*()~"}); + auto sv = cudf::strings_column_view(input); + auto targets = cudf::test::strings_column_wrapper({" the ", "search", "", "string", "ox", "é "}); + auto results = cudf::strings::multi_contains(sv, cudf::strings_column_view(targets)); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), cudf::test::fixed_width_column_wrapper({1,0,1,0,0,0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), cudf::test::fixed_width_column_wrapper({0,1,0,0,0,0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), cudf::test::fixed_width_column_wrapper({1,1,1,1,1,1})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(3), cudf::test::fixed_width_column_wrapper({0,0,0,0,1,0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(4), cudf::test::fixed_width_column_wrapper({1,0,0,0,0,0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(5), cudf::test::fixed_width_column_wrapper({0,0,1,0,0,0})); +} + TEST_F(StringsFindTest, StartsWith) { cudf::test::strings_column_wrapper strings({"Héllo", "thesé", "", "lease", "tést strings", ""}, From 849c09375ad93e748a06d900d24f2421b419114b Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 11 Sep 2024 17:55:10 +0800 Subject: [PATCH 16/24] Format code --- cpp/tests/strings/find_tests.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index 81415c7703f..df110c5092f 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -275,12 +275,18 @@ TEST_F(StringsFindTest, MultiContainsLongStrings) auto sv = cudf::strings_column_view(input); auto targets = cudf::test::strings_column_wrapper({" the ", "search", "", "string", "ox", "é "}); auto results = cudf::strings::multi_contains(sv, cudf::strings_column_view(targets)); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), cudf::test::fixed_width_column_wrapper({1,0,1,0,0,0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), cudf::test::fixed_width_column_wrapper({0,1,0,0,0,0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), cudf::test::fixed_width_column_wrapper({1,1,1,1,1,1})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(3), cudf::test::fixed_width_column_wrapper({0,0,0,0,1,0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(4), cudf::test::fixed_width_column_wrapper({1,0,0,0,0,0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(5), cudf::test::fixed_width_column_wrapper({0,0,1,0,0,0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(0), cudf::test::fixed_width_column_wrapper({1, 0, 1, 0, 0, 0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(1), cudf::test::fixed_width_column_wrapper({0, 1, 0, 0, 0, 0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(2), cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(3), cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 1, 0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(4), cudf::test::fixed_width_column_wrapper({1, 0, 0, 0, 0, 0})); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( + results->get_column(5), cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 0, 0})); } TEST_F(StringsFindTest, StartsWith) From 85e8b17dc153f37f5e1d429b2c717ffcd9b56904 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 11 Sep 2024 18:08:22 +0800 Subject: [PATCH 17/24] Refactor: refine code comments --- cpp/src/strings/search/find.cu | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 0b23da6ac20..4f12649befe 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -465,7 +465,9 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( auto first_byte_ptr = thrust::lower_bound(thrust::seq, d_target_first_bytes.begin(), d_target_first_bytes.end(), c); if (not(first_byte_ptr != d_target_first_bytes.end() && *first_byte_ptr == c)) { - // first char is not matched for all targets, already set result as found + // first char is not matched for all targets + // Note: first bytes does not work for empty target. + // For empty target, already set result as found continue; } @@ -527,7 +529,8 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. auto const d_str = d_strings.element(str_idx); - // check empty target, the result of searching empty target is true. + // initialize temp result: + // set true if target is empty, set false otherwise for (auto target_idx = 0; target_idx < num_targets; ++target_idx) { auto const d_target = d_targets.element(target_idx); d_results[target_idx][str_idx] = d_target.size_bytes() == 0; @@ -542,8 +545,9 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( thrust::lower_bound(thrust::seq, d_target_first_bytes.begin(), d_target_first_bytes.end(), c); if (not(first_byte_ptr != d_target_first_bytes.end() && *first_byte_ptr == c)) { - // For non-empty targets: no need to search for `str_byte_idx` position, because first char is - // unmatched. For empty targets: already set result as found. + // first char is not matched for all targets + // Note: first bytes does not work for empty target. + // For empty target, already set result as found continue; } @@ -574,7 +578,7 @@ CUDF_KERNEL void multi_contains_using_indexes_fn( } /** - * Execute multi contains for short strings + * Execute multi contains. * First index the first char for all targets. * Index the first char: * collect first char for all targets and do uniq and sort, From 9fc939884aed22e5ba787f356aea09e4760bed61 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Sat, 14 Sep 2024 17:06:51 +0800 Subject: [PATCH 18/24] Fix bug: illegal memory access --- cpp/src/strings/search/find.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 4f12649befe..2f3a9389752 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -504,7 +504,7 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( for (int target_idx = 0; target_idx < num_targets; target_idx++) { bool found = false; for (int lane_idx = 0; lane_idx < cudf::detail::warp_size; lane_idx++) { - int temp_idx = (str_idx * cudf::detail::warp_size + lane_idx) * num_targets + target_idx; + int temp_idx = (threadIdx.x + lane_idx) * num_targets + target_idx; if (shared_bools[temp_idx]) { found = true; break; From b33d692f92d5ef3a0f05ec57ec906b029bd782ac Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Sat, 14 Sep 2024 17:10:28 +0800 Subject: [PATCH 19/24] Fix bug in split logic --- cpp/src/strings/search/find.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 2f3a9389752..4ceb981cf67 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -885,7 +885,6 @@ std::unique_ptr
multi_contains(strings_column_view const& input, } else { // Too many targets will consume more shared memory, so split targets std::vector> ret_columns; - ret_columns.resize(targets.size()); size_type num_groups = (targets.size() + target_group_size - 1) / target_group_size; for (size_type group_idx = 0; group_idx < num_groups; group_idx++) { size_type start_target = group_idx * target_group_size; From 6741bef90237ae4c0e34eec13e96bb15c66ce38f Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Sat, 14 Sep 2024 17:14:48 +0800 Subject: [PATCH 20/24] Optimize the perf for indexing first chars --- cpp/src/strings/search/find.cu | 35 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 4ceb981cf67..30599b354ec 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -616,40 +616,28 @@ std::vector> multi_contains(bool warp_parallel, static_cast(targets_offsets.size())}, stream); - // 2. index the first characters in targets - // 2.1 collect first characters in targets - thrust::host_vector h_first_bytes = {}; + // 2. index the first characters for all targets + std::map> indexes; for (auto i = 0; i < targets.size(); i++) { auto target_begin_offset = h_targets_offsets[i]; auto target_end_offset = h_targets_offsets[i + 1]; if (target_end_offset - target_begin_offset > 0) { char first_char = h_targets_child[target_begin_offset]; - auto no_exist = - thrust::find(h_first_bytes.begin(), h_first_bytes.end(), first_char) == h_first_bytes.end(); - if (no_exist) { h_first_bytes.push_back(first_char); } + auto not_exist = indexes.find(first_char) == indexes.end(); + if (not_exist) { indexes[first_char] = std::vector();} + indexes[first_char].push_back(i); } } - - // 2.2 sort the first characters - thrust::sort(h_first_bytes.begin(), h_first_bytes.end()); - - // 2.3 generate indexes: map from `first char in target` to `target indexes` + thrust::host_vector h_first_bytes = {}; thrust::host_vector h_offsets = {0}; thrust::host_vector h_elements = {}; - for (size_t i = 0; i < h_first_bytes.size(); i++) { - auto expected_first_byte = h_first_bytes[i]; - for (auto target_idx = 0; target_idx < targets.size(); target_idx++) { - auto target_begin_offset = h_targets_offsets[target_idx]; - auto target_end_offset = h_targets_offsets[target_idx + 1]; - if (target_end_offset - target_begin_offset > 0) { - char curr_first_byte = h_targets_child[target_begin_offset]; - if (expected_first_byte == curr_first_byte) { h_elements.push_back(target_idx); } - } - } + for (const auto& pair : indexes) { + h_first_bytes.push_back(pair.first); + h_elements.insert(h_elements.end(), pair.second.begin(), pair.second.end()); h_offsets.push_back(h_elements.size()); } - // 2.4 copy first char set and first char indexes to device + // 3. copy first char set and first char indexes to device auto d_first_bytes = cudf::detail::make_device_uvector_async(h_first_bytes, stream, mr); auto d_offsets = cudf::detail::make_device_uvector_async(h_offsets, stream, mr); auto d_elements = cudf::detail::make_device_uvector_async(h_elements, stream, mr); @@ -674,7 +662,7 @@ std::vector> multi_contains(bool warp_parallel, mr); auto d_list_column = column_device_view::create(list_column->view(), stream); - // 3. Create output columns. + // 4. Create output columns. auto const results_iter = thrust::make_transform_iterator(thrust::counting_iterator(0), [&](int i) { return make_numeric_column(data_type{type_id::BOOL8}, @@ -699,6 +687,7 @@ std::vector> multi_contains(bool warp_parallel, auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_targets = column_device_view::create(targets.parent(), stream); + //5. execute the kernel constexpr int block_size = 256; cudf::detail::grid_1d grid{input.size(), block_size}; From 330e828acf00290edd35de5cd6888245a96d096c Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Sat, 14 Sep 2024 17:38:48 +0800 Subject: [PATCH 21/24] Fix comments from code review --- cpp/benchmarks/string/find.cpp | 38 +++++++++++----------------------- cpp/src/strings/search/find.cu | 13 ++++++------ 2 files changed, 19 insertions(+), 32 deletions(-) diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index 8c9c4d60f0c..f07e82286f2 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -71,10 +71,10 @@ static void bench_find_string(nvbench::state& state) cudf::strings::find_multiple(input, cudf::strings_column_view(targets)); }); } else if (api == "contains") { - constexpr bool combine = false; // test true/false - bool has_same_target_first_char = false; // test true/false - constexpr int iters = 10; // test 4/10 - + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { cudf::strings::contains(input, target); }); + } else if (api == "multi-contains") { + constexpr int iters = 10; std::vector match_targets({" abc", "W43", "0987 5W43", @@ -87,28 +87,13 @@ static void bench_find_string(nvbench::state& state) "987 5W43"}); auto multi_targets = std::vector{}; for (int i = 0; i < iters; i++) { - // if has same first chars in targets, use duplicated targets. - int idx = has_same_target_first_char ? i / 2 : i; - multi_targets.emplace_back(match_targets[idx]); - } - - if constexpr (not combine) { - state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - std::vector> contains_results; - std::vector contains_cvs; - for (size_t i = 0; i < multi_targets.size(); i++) { - contains_results.emplace_back( - cudf::strings::contains(input, cudf::string_scalar(multi_targets[i]))); - contains_cvs.emplace_back(contains_results.back()->view()); - } - }); - } else { // combine - state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), - multi_targets.end()); - cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); - }); + multi_targets.emplace_back(match_targets[i % match_targets.size()]); } + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::test::strings_column_wrapper multi_targets_column(multi_targets.begin(), + multi_targets.end()); + cudf::strings::multi_contains(input, cudf::strings_column_view(multi_targets_column)); + }); } else if (api == "starts_with") { state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); }); @@ -120,7 +105,8 @@ static void bench_find_string(nvbench::state& state) NVBENCH_BENCH(bench_find_string) .set_name("find_string") - .add_string_axis("api", {"find", "find_multi", "contains", "starts_with", "ends_with"}) + .add_string_axis("api", + {"find", "find_multi", "contains", "starts_with", "ends_with", "multi-contains"}) .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024}) .add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216}) .add_int64_axis("hit_rate", {20, 80}); // percentage diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 30599b354ec..8a0f3b245ae 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -436,8 +436,9 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( auto const num_targets = d_targets.size(); auto const num_rows = d_strings.size(); - auto const idx = static_cast(threadIdx.x + blockIdx.x * blockDim.x); - if (idx >= (num_rows * cudf::detail::warp_size)) { return; } + auto const idx = cudf::detail::grid_1d::global_thread_id(); + auto const str_idx = idx / cudf::detail::warp_size; + if (str_idx >= num_rows) { return; } auto const lane_idx = idx % cudf::detail::warp_size; auto const str_idx = idx / cudf::detail::warp_size; @@ -623,12 +624,12 @@ std::vector> multi_contains(bool warp_parallel, auto target_end_offset = h_targets_offsets[i + 1]; if (target_end_offset - target_begin_offset > 0) { char first_char = h_targets_child[target_begin_offset]; - auto not_exist = indexes.find(first_char) == indexes.end(); - if (not_exist) { indexes[first_char] = std::vector();} + auto not_exist = indexes.find(first_char) == indexes.end(); + if (not_exist) { indexes[first_char] = std::vector(); } indexes[first_char].push_back(i); } } - thrust::host_vector h_first_bytes = {}; + thrust::host_vector h_first_bytes = {}; thrust::host_vector h_offsets = {0}; thrust::host_vector h_elements = {}; for (const auto& pair : indexes) { @@ -687,7 +688,7 @@ std::vector> multi_contains(bool warp_parallel, auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_targets = column_device_view::create(targets.parent(), stream); - //5. execute the kernel + // 5. execute the kernel constexpr int block_size = 256; cudf::detail::grid_1d grid{input.size(), block_size}; From d216993d4fcef3fc9ce4d663e432bbc9b197d813 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Sat, 14 Sep 2024 17:46:22 +0800 Subject: [PATCH 22/24] Fix compile error --- cpp/src/strings/search/find.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 8a0f3b245ae..37d8568a26c 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -441,7 +441,6 @@ CUDF_KERNEL void multi_contains_warp_parallel_multi_scalars_fn( if (str_idx >= num_rows) { return; } auto const lane_idx = idx % cudf::detail::warp_size; - auto const str_idx = idx / cudf::detail::warp_size; if (d_strings.is_null(str_idx)) { return; } // bitmask will set result to null. // get the string for this warp auto const d_str = d_strings.element(str_idx); From a32c54dcdc39c7f2c5885ae97d3da995183d6ca8 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 18 Sep 2024 19:46:33 +0800 Subject: [PATCH 23/24] Fix bugs; update tests --- cpp/src/strings/search/find.cu | 10 +- cpp/tests/strings/find_tests.cpp | 158 +++++++++++++++++++++---------- 2 files changed, 116 insertions(+), 52 deletions(-) diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 37d8568a26c..6ca45dc2d53 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -610,10 +610,13 @@ std::vector> multi_contains(bool warp_parallel, // 1. copy targets from device to host auto const h_targets_child = cudf::detail::make_std_vector_sync( cudf::device_span(targets.chars_begin(stream), targets.chars_size(stream)), stream); + + // Note: targets may be sliced, so should find the correct first offset + auto first_offset = targets.offset(); auto const targets_offsets = targets.offsets(); auto const h_targets_offsets = cudf::detail::make_std_vector_sync( - cudf::device_span{targets_offsets.data(), - static_cast(targets_offsets.size())}, + cudf::device_span{targets_offsets.data() + first_offset, + static_cast(targets.size() + 1)}, stream); // 2. index the first characters for all targets @@ -689,9 +692,9 @@ std::vector> multi_contains(bool warp_parallel, // 5. execute the kernel constexpr int block_size = 256; - cudf::detail::grid_1d grid{input.size(), block_size}; if (warp_parallel) { + cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; int shared_mem_size = block_size * targets.size(); multi_contains_warp_parallel_multi_scalars_fn<<> multi_contains(bool warp_parallel, stream.value()>>>( *d_strings, *d_targets, d_first_bytes, *d_list_column, device_results_list); } else { + cudf::detail::grid_1d grid{input.size(), block_size}; multi_contains_using_indexes_fn<< s = { + "Héllo, there world and goodbye", + "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving", + "the following code snippet demonstrates how to use search for values in an ordered range", + "it returns the last position where value could be inserted without violating the ordering", + "algorithms execution is parallelized as determined by an execution policy. t", + "he this is a continuation of previous row to make sure string boundaries are honored", + "abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ !@#$%^&*()~", + "", + ""}; + + // replicate strings + auto string_itr = + cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s[i % s.size()]; }); + + // nulls: 8, 8 + 1 * 9, 8 + 2 * 9 ...... + auto string_v = cudf::detail::make_counting_transform_iterator( + 0, [&](auto i) { return (i + 1) % s.size() != 0; }); + + auto const strings = + cudf::test::strings_column_wrapper(string_itr, string_itr + num_rows, string_v); auto strings_view = cudf::strings_column_view(strings); std::vector match_targets({" the ", "a", "", "é"}); cudf::test::strings_column_wrapper multi_targets_column(match_targets.begin(), match_targets.end()); auto results = cudf::strings::multi_contains(strings_view, cudf::strings_column_view(multi_targets_column)); - auto expected_0 = - cudf::test::fixed_width_column_wrapper({0, 1, 0, 1, 0, 0, 0, 0, 0}, null_at(8)); - auto expected_1 = - cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 0, 0}, null_at(8)); - auto expected_2 = - cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1, 1, 1, 0}, null_at(8)); - auto expected_3 = - cudf::test::fixed_width_column_wrapper({1, 0, 0, 0, 0, 0, 0, 0, 0}, null_at(8)); + + std::vector ret_0 = {0, 1, 0, 1, 0, 0, 0, 0, 0}; + std::vector ret_1 = {1, 1, 1, 1, 1, 1, 1, 0, 0}; + std::vector ret_2 = {1, 1, 1, 1, 1, 1, 1, 1, 0}; + std::vector ret_3 = {1, 0, 0, 0, 0, 0, 0, 0, 0}; + + auto make_bool_col_fn = [&string_v, &num_rows](std::vector bools) { + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [&](auto i) { return bools[i % bools.size()]; }); + return cudf::test::fixed_width_column_wrapper(iter, iter + num_rows, string_v); + }; + + auto expected_0 = make_bool_col_fn(ret_0); + auto expected_1 = make_bool_col_fn(ret_1); + auto expected_2 = make_bool_col_fn(ret_2); + auto expected_3 = make_bool_col_fn(ret_3); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), expected_0); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), expected_1); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), expected_2); @@ -232,21 +252,25 @@ TEST_F(StringsFindTest, MultiContains) TEST_F(StringsFindTest, MultiContainsMoreTargets) { - using cudf::test::iterators::null_at; - auto const strings = - cudf::test::strings_column_wrapper{{"there world and goodbye", "good", ""}, null_at(2)}; + auto const strings = cudf::test::strings_column_wrapper{ + "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving " + "quick brown fox jumped", + "the following code snippet demonstrates how to use search for values in an ordered rangethe " + "following code snippet", + "thé it returns the last position where value could be inserted without violating ordering thé " + "it returns the last position"}; auto strings_view = cudf::strings_column_view(strings); - std::vector targets({"goodbye", "non-exist", ""}); + std::vector targets({"lazy brown", "non-exist", ""}); std::vector> expects; - expects.push_back(cudf::test::fixed_width_column_wrapper({1, 0, 0}, null_at(2))); - expects.push_back(cudf::test::fixed_width_column_wrapper({0, 0, 0}, null_at(2))); - expects.push_back(cudf::test::fixed_width_column_wrapper({1, 1, 0}, null_at(2))); + expects.push_back(cudf::test::fixed_width_column_wrapper({1, 0, 0})); + expects.push_back(cudf::test::fixed_width_column_wrapper({0, 0, 0})); + expects.push_back(cudf::test::fixed_width_column_wrapper({1, 1, 1})); std::vector match_targets; - int max_num_targets = 50; + int max_num_targets = 20; - for (int num_targets = 1; num_targets < max_num_targets; num_targets++) { + for (int num_targets = 19; num_targets < max_num_targets; num_targets++) { match_targets.clear(); for (int i = 0; i < num_targets; i++) { match_targets.push_back(targets[i % targets.size()]); @@ -265,28 +289,64 @@ TEST_F(StringsFindTest, MultiContainsMoreTargets) TEST_F(StringsFindTest, MultiContainsLongStrings) { - auto const input = cudf::test::strings_column_wrapper( - {"quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving", - "the following code snippet demonstrates how to use search for values in an ordered range", - "thé it returns the last position where value could be inserted without violating ordering", - "algorithms execution is parallelized as determined by an execution policy. t", - "he this is a continuation of previous row to make sure string boundaries are honored", - "abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ !@#$%^&*()~"}); - auto sv = cudf::strings_column_view(input); + constexpr int num_rows = 1024 + 1; + // replicate the following 7 rows: + std::vector s = { + "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving " + "quick brown fox jumped", + "the following code snippet demonstrates how to use search for values in an ordered rangethe " + "following code snippet", + "thé it returns the last position where value could be inserted without violating ordering thé " + "it returns the last position", + "algorithms execution is parallelized as determined by an execution policy. t algorithms " + "execution is parallelized as ", + "he this is a continuation of previous row to make sure string boundaries are honored he this " + "is a continuation of previous row", + "abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ " + "!@#$%^&*()~abcdefghijklmnopqrstuvwxyz 0123456789 ABCDEFGHIJKL", + ""}; + + // replicate strings + auto string_itr = + cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return s[i % s.size()]; }); + + // nulls: 6, 6 + 1 * 7, 6 + 2 * 7 ...... + auto string_v = cudf::detail::make_counting_transform_iterator( + 0, [&](auto i) { return (i + 1) % s.size() != 0; }); + + auto const strings = + cudf::test::strings_column_wrapper(string_itr, string_itr + num_rows, string_v); + + auto sv = cudf::strings_column_view(strings); auto targets = cudf::test::strings_column_wrapper({" the ", "search", "", "string", "ox", "é "}); auto results = cudf::strings::multi_contains(sv, cudf::strings_column_view(targets)); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(0), cudf::test::fixed_width_column_wrapper({1, 0, 1, 0, 0, 0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(1), cudf::test::fixed_width_column_wrapper({0, 1, 0, 0, 0, 0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(2), cudf::test::fixed_width_column_wrapper({1, 1, 1, 1, 1, 1})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(3), cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 1, 0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(4), cudf::test::fixed_width_column_wrapper({1, 0, 0, 0, 0, 0})); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT( - results->get_column(5), cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 0, 0})); + + std::vector ret_0 = {1, 0, 1, 0, 0, 0, 0}; + std::vector ret_1 = {0, 1, 0, 0, 0, 0, 0}; + std::vector ret_2 = {1, 1, 1, 1, 1, 1, 0}; + std::vector ret_3 = {0, 0, 0, 0, 1, 0, 0}; + std::vector ret_4 = {1, 0, 0, 0, 0, 0, 0}; + std::vector ret_5 = {0, 0, 1, 0, 0, 0, 0}; + + auto make_bool_col_fn = [&string_v, &num_rows](std::vector bools) { + auto iter = cudf::detail::make_counting_transform_iterator( + 0, [&](auto i) { return bools[i % bools.size()]; }); + return cudf::test::fixed_width_column_wrapper(iter, iter + num_rows, string_v); + }; + + auto expected_0 = make_bool_col_fn(ret_0); + auto expected_1 = make_bool_col_fn(ret_1); + auto expected_2 = make_bool_col_fn(ret_2); + auto expected_3 = make_bool_col_fn(ret_3); + auto expected_4 = make_bool_col_fn(ret_4); + auto expected_5 = make_bool_col_fn(ret_5); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0), expected_0); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(1), expected_1); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(2), expected_2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(3), expected_3); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(4), expected_4); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(5), expected_5); } TEST_F(StringsFindTest, StartsWith) From 5caf782929e80dab8f96f166a372387d834e62e0 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 18 Sep 2024 19:49:22 +0800 Subject: [PATCH 24/24] Update --- cpp/tests/strings/find_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp index 4c79daf739d..52369d0755c 100644 --- a/cpp/tests/strings/find_tests.cpp +++ b/cpp/tests/strings/find_tests.cpp @@ -268,9 +268,9 @@ TEST_F(StringsFindTest, MultiContainsMoreTargets) expects.push_back(cudf::test::fixed_width_column_wrapper({1, 1, 1})); std::vector match_targets; - int max_num_targets = 20; + int max_num_targets = 50; - for (int num_targets = 19; num_targets < max_num_targets; num_targets++) { + for (int num_targets = 1; num_targets < max_num_targets; num_targets++) { match_targets.clear(); for (int i = 0; i < num_targets; i++) { match_targets.push_back(targets[i % targets.size()]);