From 15f53c35870fbd68cc6d5615499c9914e2d95dc3 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Wed, 4 Dec 2024 11:41:27 -0500 Subject: [PATCH] adding in comment fixes, failing big csr tests still --- .../sparse/matrix/detail/preprocessing.cuh | 416 ++++++++++-------- .../raft/sparse/matrix/preprocessing.cuh | 46 +- cpp/include/raft/sparse/neighbors/knn.cuh | 12 + cpp/template/build.sh | 41 ++ cpp/test/sparse/preprocess_coo.cu | 21 + cpp/test/sparse/preprocess_csr.cu | 25 +- 6 files changed, 356 insertions(+), 205 deletions(-) create mode 100755 cpp/template/build.sh diff --git a/cpp/include/raft/sparse/matrix/detail/preprocessing.cuh b/cpp/include/raft/sparse/matrix/detail/preprocessing.cuh index 8a30903aa6..38c0d7405d 100644 --- a/cpp/include/raft/sparse/matrix/detail/preprocessing.cuh +++ b/cpp/include/raft/sparse/matrix/detail/preprocessing.cuh @@ -33,9 +33,9 @@ namespace raft::sparse::matrix::detail { * @param k_param: K value required by BM25 algorithm. * @param b_param: B value required by BM25 algorithm. */ -template +template struct bm25 { - bm25(T1 num_feats, T2 avg_feat_len, T2 k_param, T2 b_param) + bm25(IndexType num_feats, ValueType avg_feat_len, ValueType k_param, ValueType b_param) { total_feats = num_feats; avg_feat_length = avg_feat_len; @@ -43,18 +43,20 @@ struct bm25 { b = b_param; } - float __device__ operator()(const T2& value, const T2& num_feats_id_occ, const T2& feat_length) + float __device__ operator()(const ValueType& value, + const ValueType& num_feats_id_occ, + const ValueType& feat_length) { - T2 tf = T2(value / feat_length); - T2 idf = raft::log(total_feats / num_feats_id_occ); - T2 bm = ((k + 1) * tf) / (k * ((1.0f - b) + b * (feat_length / avg_feat_length)) + tf); + ValueType tf = ValueType(value / feat_length); + ValueType idf = raft::log(total_feats / num_feats_id_occ); + ValueType bm = ((k + 1) * tf) / (k * ((1.0f - b) + b * (feat_length / avg_feat_length)) + tf); return idf * bm; } - T2 avg_feat_length; - T1 total_feats; - T2 k; - T2 b; + ValueType avg_feat_length; + IndexType total_feats; + ValueType k; + ValueType b; }; /** @@ -62,26 +64,28 @@ struct bm25 { * logrithmically scaled frequency. * @param total_feats_param: The total number of features in the matrix */ -template +template struct tfidf { - tfidf(T1 total_feats_param) { total_feats = total_feats_param; } + tfidf(IndexType total_feats_param) { total_feats = total_feats_param; } - float __device__ operator()(const T2& value, const T2& num_feats_id_occ, const T2& feat_length) + float __device__ operator()(const ValueType& value, + const ValueType& num_feats_id_occ, + const ValueType& feat_length) { - T2 tf = T2(value / feat_length); - T2 idf = raft::log(total_feats / num_feats_id_occ); + ValueType tf = ValueType(value / feat_length); + ValueType idf = raft::log(total_feats / num_feats_id_occ); return tf * idf; } - T1 total_feats; + IndexType total_feats; }; -template +template struct mapper { - mapper(raft::device_vector_view map) : map(map) {} + mapper(raft::device_vector_view map) : map(map) {} - float __device__ operator()(const T& value) + float __device__ operator()(const ValueType& value) { - T new_value = map[value]; + ValueType new_value = map[value]; if (new_value) { return new_value; } else { @@ -89,24 +93,27 @@ struct mapper { } } - raft::device_vector_view map; + raft::device_vector_view map; }; -template +template struct map_to { - map_to(raft::device_vector_view map) : map(map) {} + map_to(raft::device_vector_view map) : map(map) {} - float __device__ operator()(const T1& key, const T2& count) + float __device__ operator()(const IndexType& key, const ValueType& count) { map[key] = count; return 0.0f; } - raft::device_vector_view map; + raft::device_vector_view map; }; /** * @brief Get unique counts + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param sort_vector: Input COO array that contains the keys. * @param secondary_vector: Input with secondary keys of COO, (columns or rows). @@ -115,16 +122,17 @@ struct map_to { * @param keys_out: Output array with one entry for each key. (same size as counts_out) * @param counts_out: Output array with cumulative sum for each key. (same size as keys_out) */ -template +template void get_uniques_counts(raft::resources& handle, - raft::device_vector_view sort_vector, - raft::device_vector_view secondary_vector, - raft::device_vector_view data, - raft::device_vector_view itr_vals, - raft::device_vector_view keys_out, - raft::device_vector_view counts_out) + raft::device_vector_view sort_vector, + raft::device_vector_view secondary_vector, + raft::device_vector_view data, + raft::device_vector_view itr_vals, + raft::device_vector_view keys_out, + raft::device_vector_view counts_out) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); + raft::sparse::op::coo_sort(int(sort_vector.size()), int(secondary_vector.size()), int(data.size()), @@ -134,6 +142,7 @@ void get_uniques_counts(raft::resources& handle, stream); // replace this call with raft version when available // (https://github.com/rapidsai/raft/issues/2477) + RAFT_CHECK_CUDA(stream); thrust::reduce_by_key(raft::resource::get_thrust_policy(handle), sort_vector.data_handle(), sort_vector.data_handle() + sort_vector.size(), @@ -144,6 +153,9 @@ void get_uniques_counts(raft::resources& handle, /** * @brief Broadcasts values to target indices of vector based on key/value look up + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param origin: Input array that has values to use for computation * @param keys: Output array that has keys, should be the size of unique @@ -151,31 +163,35 @@ void get_uniques_counts(raft::resources& handle, * @param results: Output array that scatters the counts to origin value positions. Same size as * origin array. */ -template +template void create_mapped_vector(raft::resources& handle, - const raft::device_vector_view origin, - const raft::device_vector_view keys, - const raft::device_vector_view counts, - raft::device_vector_view result, - T1 key_size) + const raft::device_vector_view origin, + const raft::device_vector_view keys, + const raft::device_vector_view counts, + raft::device_vector_view result, + IndexType key_size) { // index into the last element and then add 1 to it. - auto origin_map = raft::make_device_vector(handle, key_size + 1); + auto origin_map = raft::make_device_vector(handle, key_size + 1); raft::matrix::fill(handle, origin_map.view(), 0.0f); - auto dummy_vec = raft::make_device_vector(handle, keys.size()); + auto dummy_vec = raft::make_device_vector(handle, keys.size()); raft::linalg::map(handle, dummy_vec.view(), - map_to(origin_map.view()), + map_to(origin_map.view()), raft::make_const_mdspan(keys), raft::make_const_mdspan(counts)); - raft::linalg::map(handle, result, raft::cast_op{}, raft::make_const_mdspan(origin)); - raft::linalg::map(handle, result, mapper(origin_map.view()), raft::make_const_mdspan(result)); + raft::linalg::map(handle, result, raft::cast_op{}, raft::make_const_mdspan(origin)); + raft::linalg::map( + handle, result, mapper(origin_map.view()), raft::make_const_mdspan(result)); } /** * @brief Compute row(id) counts + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param rows: Input COO rows array * @param columns: Input COO columns array @@ -183,13 +199,13 @@ void create_mapped_vector(raft::resources& handle, * @param id_counts: Output array that stores counts per row, scattered to same shape as rows. * @param n_rows: Number of rows in matrix */ -template +template void get_id_counts(raft::resources& handle, - raft::device_vector_view rows, - raft::device_vector_view columns, - raft::device_vector_view values, - raft::device_vector_view id_counts, - T1 n_rows) + raft::device_vector_view rows, + raft::device_vector_view columns, + raft::device_vector_view values, + raft::device_vector_view id_counts, + IndexType n_rows) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); @@ -201,20 +217,23 @@ void get_id_counts(raft::resources& handle, values.data_handle(), stream); - auto rows_counts = raft::make_device_vector(handle, n_rows); + auto rows_counts = raft::make_device_vector(handle, n_rows); raft::matrix::fill(handle, rows_counts.view(), 0); - raft::sparse::linalg::coo_degree(raft::make_const_mdspan(rows).data_handle(), - int(rows.size()), - rows_counts.data_handle(), - stream); + raft::sparse::linalg::coo_degree(raft::make_const_mdspan(rows).data_handle(), + int(rows.size()), + rows_counts.data_handle(), + stream); raft::linalg::map( - handle, id_counts, mapper(rows_counts.view()), raft::make_const_mdspan(rows)); + handle, id_counts, mapper(rows_counts.view()), raft::make_const_mdspan(rows)); } /** * @brief Gather per feature mean values, returns the cumulative avg feature length. + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param rows: Input COO rows array * @param columns: Input COO columns array @@ -222,26 +241,27 @@ void get_id_counts(raft::resources& handle, * @param feat_lengths: Output array that stores mean per feature value * @param n_cols: Number of columns in matrix */ -template +template float get_feature_data(raft::resources& handle, - raft::device_vector_view rows, - raft::device_vector_view columns, - raft::device_vector_view values, - raft::device_vector_view feat_lengths, - T1 n_cols) + raft::device_vector_view rows, + raft::device_vector_view columns, + raft::device_vector_view values, + raft::device_vector_view feat_lengths, + IndexType n_cols) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); - auto preserved_columns = raft::make_device_vector(handle, columns.size()); - raft::copy(preserved_columns.data_handle(), columns.data_handle(), columns.size(), stream); + auto preserved_columns = raft::make_device_vector(handle, columns.size()); + int uniq_cols = raft::sparse::neighbors::get_n_components(columns.data_handle(), columns.size(), stream); - auto col_keys = raft::make_device_vector(handle, uniq_cols); - auto col_counts = raft::make_device_vector(handle, uniq_cols); + raft::copy(preserved_columns.data_handle(), columns.data_handle(), columns.size(), stream); - get_uniques_counts(handle, columns, rows, values, values, col_keys.view(), col_counts.view()); + auto col_keys = raft::make_device_vector(handle, uniq_cols); + auto col_counts = raft::make_device_vector(handle, uniq_cols); - auto total_feature_lengths = raft::make_device_scalar(handle, 0); + get_uniques_counts(handle, columns, rows, values, values, col_keys.view(), col_counts.view()); + auto total_feature_lengths = raft::make_device_scalar(handle, 0); raft::linalg::mapReduce(total_feature_lengths.data_handle(), col_counts.size(), 0, @@ -249,19 +269,22 @@ float get_feature_data(raft::resources& handle, raft::add_op(), stream, col_counts.data_handle()); - auto total_feature_lengths_host = raft::make_host_scalar(handle, 0); + auto total_feature_lengths_host = raft::make_host_scalar(handle, 0); raft::copy(total_feature_lengths_host.data_handle(), total_feature_lengths.data_handle(), total_feature_lengths.size(), stream); - T2 avg_feat_length = T2(total_feature_lengths_host(0)) / n_cols; - create_mapped_vector( + ValueType avg_feat_length = ValueType(total_feature_lengths_host(0)) / n_cols; + create_mapped_vector( handle, preserved_columns.view(), col_keys.view(), col_counts.view(), feat_lengths, n_cols); return avg_feat_length; } /** * @brief Gather per feature mean values and id counts, returns the cumulative avg feature length. + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param rows: Input COO rows array * @param columns: Input COO columns array @@ -271,15 +294,15 @@ float get_feature_data(raft::resources& handle, * @param n_rows: Number of rows in matrix * @param n_cols: Number of columns in matrix */ -template +template float sparse_search_preprocess(raft::resources& handle, - raft::device_vector_view rows, - raft::device_vector_view columns, - raft::device_vector_view values, - raft::device_vector_view feat_lengths, - raft::device_vector_view id_counts, - T1 n_rows, - T1 n_cols) + raft::device_vector_view rows, + raft::device_vector_view columns, + raft::device_vector_view values, + raft::device_vector_view feat_lengths, + raft::device_vector_view id_counts, + IndexType n_rows, + IndexType n_cols) { auto avg_feature_len = get_feature_data(handle, rows, columns, values, feat_lengths, n_cols); @@ -290,6 +313,9 @@ float sparse_search_preprocess(raft::resources& handle, /** * @brief Use TFIDF algorithm to encode features in COO sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param rows: Input COO rows array * @param columns: Input COO columns array @@ -298,24 +324,24 @@ float sparse_search_preprocess(raft::resources& handle, * @param n_rows: Number of rows in matrix * @param n_cols: Number of columns in matrix */ -template +template void base_encode_tfidf(raft::resources& handle, - raft::device_vector_view rows, - raft::device_vector_view columns, - raft::device_vector_view values, - raft::device_vector_view values_out, - T1 n_rows, - T1 n_cols) + raft::device_vector_view rows, + raft::device_vector_view columns, + raft::device_vector_view values, + raft::device_vector_view values_out, + IndexType n_rows, + IndexType n_cols) { - auto feat_lengths = raft::make_device_vector(handle, values.size()); - auto id_counts = raft::make_device_vector(handle, values.size()); - auto col_counts = raft::make_device_vector(handle, n_cols); - auto avg_feat_length = sparse_search_preprocess( + auto feat_lengths = raft::make_device_vector(handle, values.size()); + auto id_counts = raft::make_device_vector(handle, values.size()); + auto col_counts = raft::make_device_vector(handle, n_cols); + auto avg_feat_length = sparse_search_preprocess( handle, rows, columns, values, feat_lengths.view(), id_counts.view(), n_rows, n_cols); raft::linalg::map(handle, values_out, - tfidf(n_cols), + tfidf(n_cols), raft::make_const_mdspan(values), raft::make_const_mdspan(id_counts.view()), raft::make_const_mdspan(feat_lengths.view())); @@ -323,69 +349,79 @@ void base_encode_tfidf(raft::resources& handle, /** * @brief Use TFIDF algorithm to encode features in COO sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param coo_in: Input COO matrix * @param values_out: Output COO values array */ -template +template void encode_tfidf(raft::resources& handle, - raft::device_coo_matrix_view coo_in, - raft::device_vector_view values_out) + raft::device_coo_matrix_view coo_in, + raft::device_vector_view values_out) { - auto rows = raft::make_device_vector_view(coo_in.structure_view().get_rows().data(), - coo_in.structure_view().get_rows().size()); - auto columns = raft::make_device_vector_view(coo_in.structure_view().get_cols().data(), - coo_in.structure_view().get_cols().size()); - auto values = raft::make_device_vector_view(coo_in.get_elements().data(), - coo_in.get_elements().size()); - - base_encode_tfidf(handle, - rows, - columns, - values, - values_out, - coo_in.structure_view().get_n_rows(), - coo_in.structure_view().get_n_cols()); + auto rows = raft::make_device_vector_view( + coo_in.structure_view().get_rows().data(), coo_in.structure_view().get_rows().size()); + auto columns = raft::make_device_vector_view( + coo_in.structure_view().get_cols().data(), coo_in.structure_view().get_cols().size()); + auto values = raft::make_device_vector_view(coo_in.get_elements().data(), + coo_in.get_elements().size()); + + base_encode_tfidf(handle, + rows, + columns, + values, + values_out, + coo_in.structure_view().get_n_rows(), + coo_in.structure_view().get_n_cols()); } /** * @brief Use TFIDF algorithm to encode features in CSR sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param csr_in: Input CSR matrix * @param values_out: Output values array */ -template +template void encode_tfidf(raft::resources& handle, - raft::device_csr_matrix_view csr_in, - raft::device_vector_view values_out) + raft::device_csr_matrix_view csr_in, + raft::device_vector_view values_out) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); - auto indptr = raft::make_device_vector_view( + auto indptr = raft::make_device_vector_view( csr_in.structure_view().get_indptr().data(), csr_in.structure_view().get_indptr().size()); - auto indices = raft::make_device_vector_view( + auto indices = raft::make_device_vector_view( csr_in.structure_view().get_indices().data(), csr_in.structure_view().get_indices().size()); - auto values = raft::make_device_vector_view(csr_in.get_elements().data(), - csr_in.get_elements().size()); - - auto rows = raft::make_device_vector(handle, values.size()); - raft::sparse::convert::csr_to_coo(indptr.data_handle(), - csr_in.structure_view().get_n_rows(), - rows.data_handle(), - rows.size(), - stream); - - base_encode_tfidf(handle, - rows.view(), - indices, - values, - values_out, - csr_in.structure_view().get_n_rows(), - csr_in.structure_view().get_n_cols()); + auto values = raft::make_device_vector_view(csr_in.get_elements().data(), + csr_in.get_elements().size()); + + auto rows = raft::make_device_vector(handle, values.size()); + + raft::sparse::convert::csr_to_coo(indptr.data_handle(), + csr_in.structure_view().get_n_rows(), + rows.data_handle(), + rows.size(), + stream); + + base_encode_tfidf(handle, + rows.view(), + indices, + values, + values_out, + csr_in.structure_view().get_n_rows(), + csr_in.structure_view().get_n_cols()); } /** * @brief Use BM25 algorithm to encode features in COO sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param rows: Input COO rows array * @param columns: Input COO columns array @@ -396,27 +432,27 @@ void encode_tfidf(raft::resources& handle, * @param k_param: K value to use for BM25 algorithm * @param b_param: B value to use for BM25 algorithm */ -template +template void base_encode_bm25(raft::resources& handle, - raft::device_vector_view rows, - raft::device_vector_view columns, - raft::device_vector_view values, - raft::device_vector_view values_out, - T1 n_rows, - T1 n_cols, - T2 k_param = 1.6f, - T2 b_param = 0.75f) + raft::device_vector_view rows, + raft::device_vector_view columns, + raft::device_vector_view values, + raft::device_vector_view values_out, + IndexType n_rows, + IndexType n_cols, + ValueType k_param = 1.6f, + ValueType b_param = 0.75f) { - auto feat_lengths = raft::make_device_vector(handle, values.size()); - auto id_counts = raft::make_device_vector(handle, values.size()); - auto col_counts = raft::make_device_vector(handle, n_cols); + auto feat_lengths = raft::make_device_vector(handle, values.size()); + auto id_counts = raft::make_device_vector(handle, values.size()); + auto col_counts = raft::make_device_vector(handle, n_cols); - auto avg_feat_length = sparse_search_preprocess( + auto avg_feat_length = sparse_search_preprocess( handle, rows, columns, values, feat_lengths.view(), id_counts.view(), n_rows, n_cols); raft::linalg::map(handle, values_out, - bm25(n_cols, avg_feat_length, k_param, b_param), + bm25(n_cols, avg_feat_length, k_param, b_param), raft::make_const_mdspan(values), raft::make_const_mdspan(id_counts.view()), raft::make_const_mdspan(feat_lengths.view())); @@ -424,74 +460,80 @@ void base_encode_bm25(raft::resources& handle, /** * @brief Use BM25 algorithm to encode features in COO sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param coo_in: Input COO matrix * @param values_out: Output values array * @param k_param: K value to use for BM25 algorithm * @param b_param: B value to use for BM25 algorithm */ -template +template void encode_bm25(raft::resources& handle, - raft::device_coo_matrix_view coo_in, - raft::device_vector_view values_out, - T2 k_param = 1.6f, - T2 b_param = 0.75f) + raft::device_coo_matrix_view coo_in, + raft::device_vector_view values_out, + ValueType k_param = 1.6f, + ValueType b_param = 0.75f) { - auto rows = raft::make_device_vector_view(coo_in.structure_view().get_rows().data(), - coo_in.structure_view().get_rows().size()); - auto columns = raft::make_device_vector_view(coo_in.structure_view().get_cols().data(), - coo_in.structure_view().get_cols().size()); - auto values = raft::make_device_vector_view(coo_in.get_elements().data(), - coo_in.get_elements().size()); - - base_encode_bm25(handle, - rows, - columns, - values, - values_out, - coo_in.structure_view().get_n_rows(), - coo_in.structure_view().get_n_cols()); + auto rows = raft::make_device_vector_view( + coo_in.structure_view().get_rows().data(), coo_in.structure_view().get_rows().size()); + auto columns = raft::make_device_vector_view( + coo_in.structure_view().get_cols().data(), coo_in.structure_view().get_cols().size()); + auto values = raft::make_device_vector_view(coo_in.get_elements().data(), + coo_in.get_elements().size()); + + base_encode_bm25(handle, + rows, + columns, + values, + values_out, + coo_in.structure_view().get_n_rows(), + coo_in.structure_view().get_n_cols()); } /** * @brief Use BM25 algorithm to encode features in CSR sparse matrix + * @tparam IndexType: the type of the edge indexes in the matrix + * @tparam ValueType: the type of the values for edges + * @tparam IdxT: the type of the index values * @param handle: raft resource handle * @param csr_in: Input CSR matrix * @param values_out: Output values array * @param k_param: K value to use for BM25 algorithm * @param b_param: B value to use for BM25 algorithm */ -template +template void encode_bm25(raft::resources& handle, - raft::device_csr_matrix_view csr_in, - raft::device_vector_view values_out, - T2 k_param = 1.6f, - T2 b_param = 0.75f) + raft::device_csr_matrix_view csr_in, + raft::device_vector_view values_out, + ValueType k_param = 1.6f, + ValueType b_param = 0.75f) { cudaStream_t stream = raft::resource::get_cuda_stream(handle); - auto indptr = raft::make_device_vector_view( + auto indptr = raft::make_device_vector_view( csr_in.structure_view().get_indptr().data(), csr_in.structure_view().get_indptr().size()); - auto indices = raft::make_device_vector_view( + auto indices = raft::make_device_vector_view( csr_in.structure_view().get_indices().data(), csr_in.structure_view().get_indices().size()); - auto values = raft::make_device_vector_view(csr_in.get_elements().data(), - csr_in.get_elements().size()); - - auto rows = raft::make_device_vector(handle, values.size()); - - raft::sparse::convert::csr_to_coo(indptr.data_handle(), - csr_in.structure_view().get_n_rows(), - rows.data_handle(), - rows.size(), - stream); - - base_encode_bm25(handle, - rows.view(), - indices, - values, - values_out, - csr_in.structure_view().get_n_rows(), - csr_in.structure_view().get_n_cols()); + auto values = raft::make_device_vector_view(csr_in.get_elements().data(), + csr_in.get_elements().size()); + + auto rows = raft::make_device_vector(handle, values.size()); + + raft::sparse::convert::csr_to_coo(indptr.data_handle(), + csr_in.structure_view().get_n_rows(), + rows.data_handle(), + rows.size(), + stream); + + base_encode_bm25(handle, + rows.view(), + indices, + values, + values_out, + csr_in.structure_view().get_n_rows(), + csr_in.structure_view().get_n_cols()); } } // namespace raft::sparse::matrix::detail \ No newline at end of file diff --git a/cpp/include/raft/sparse/matrix/preprocessing.cuh b/cpp/include/raft/sparse/matrix/preprocessing.cuh index e4b3edd64b..58335fb5c7 100644 --- a/cpp/include/raft/sparse/matrix/preprocessing.cuh +++ b/cpp/include/raft/sparse/matrix/preprocessing.cuh @@ -28,66 +28,80 @@ namespace raft::sparse::matrix { /** * @brief Use BM25 algorithm to encode features in COO sparse matrix + * @tparam IndexType is the type of the edges index in the coo matrix + * @tparam ValueType is the type of the values array in the coo matrix + * @tparam IdxT is the type of the indices of arrays in matrix * @param handle: raft resource handle * @param coo_in: Input COO matrix * @param values_out: Output values array * @param k_param: K value to use for BM25 algorithm * @param b_param: B value to use for BM25 algorithm */ -template +template void encode_bm25(raft::resources& handle, - raft::device_coo_matrix_view coo_in, - raft::device_vector_view values_out, + raft::device_coo_matrix_view coo_in, + raft::device_vector_view values_out, float k_param = 1.6f, float b_param = 0.75) { - return matrix::detail::encode_bm25(handle, coo_in, values_out, k_param, b_param); + return matrix::detail::encode_bm25( + handle, coo_in, values_out, k_param, b_param); } /** * @brief Use BM25 algorithm to encode features in CSR sparse matrix * @param handle: raft resource handle + * @tparam IndexType is the type of the edges index in the csr matrix + * @tparam ValueType is the type of the values array in the csr matrix + * @tparam IdxT is the type of the indices of arrays in matrix * @param csr_in: Input CSR matrix * @param values_out: Output values array * @param k_param: K value to use for BM25 algorithm * @param b_param: B value to use for BM25 algorithm */ -template +template void encode_bm25(raft::resources& handle, - raft::device_csr_matrix_view csr_in, - raft::device_vector_view values_out, + raft::device_csr_matrix_view csr_in, + raft::device_vector_view values_out, float k_param = 1.6f, float b_param = 0.75) { - return matrix::detail::encode_bm25(handle, csr_in, values_out, k_param, b_param); + return matrix::detail::encode_bm25( + handle, csr_in, values_out, k_param, b_param); } /** * @brief Use TFIDF algorithm to encode features in COO sparse matrix + * @tparam IndexType is the type of the edges index in the coo matrix + * @tparam ValueType is the type of the values array in the coo matrix + * @tparam IdxT is the type of the indices of arrays in matrix * @param handle: raft resource handle * @param coo_in: Input COO matrix * @param values_out: Output COO values array */ -template +template void encode_tfidf(raft::resources& handle, - raft::device_coo_matrix_view coo_in, - raft::device_vector_view values_out) + raft::device_coo_matrix_view coo_in, + raft::device_vector_view values_out) { - return matrix::detail::encode_tfidf(handle, coo_in, values_out); + return matrix::detail::encode_tfidf(handle, coo_in, values_out); } /** * @brief Use TFIDF algorithm to encode features in CSR sparse matrix + * @tparam IndexType is the type of the edges index in the csr matrix + * @tparam ValueType is the type of the values array in the csr matrix + * @tparam IdxT is the type of the indices of arrays in matrix * @param handle: raft resource handle * @param csr_in: Input CSR matrix * @param values_out: Output values array */ -template +template void encode_tfidf(raft::resources& handle, - raft::device_csr_matrix_view csr_in, - raft::device_vector_view values_out) + raft::device_csr_matrix_view csr_in, + raft::device_vector_view values_out) { - return matrix::detail::encode_tfidf(handle, csr_in, values_out); + return matrix::detail::encode_tfidf(handle, csr_in, values_out); } } // namespace raft::sparse::matrix diff --git a/cpp/include/raft/sparse/neighbors/knn.cuh b/cpp/include/raft/sparse/neighbors/knn.cuh index 7b93ea4d0d..ec278e12e4 100644 --- a/cpp/include/raft/sparse/neighbors/knn.cuh +++ b/cpp/include/raft/sparse/neighbors/knn.cuh @@ -109,6 +109,8 @@ void brute_force_knn(const value_idx* idxIndptr, /** * Search the sparse kNN for the k-nearest neighbors of a set of sparse query vectors * using some distance implementation + * @tparam value_idx is the type of the edges index in the csr matrix + * @tparam value_t is the type of the values array in the csr matrix * @param[in] csr_idx index csr matrix * @param[in] csr_query query csr matrix * @param[out] output_indices dense matrix for output indices (size n_query_rows * k) @@ -146,10 +148,14 @@ void brute_force_knn(raft::device_csr_matrix 0, "No Values were detected in the Index CSR Matrix."); + auto queryIndptr = csr_query.structure_view().get_indptr(); auto queryIndices = csr_query.structure_view().get_indices(); auto queryData = csr_query.view().get_elements(); + RAFT_EXPECTS(queryData.size() > 0, "No Values were detected in the Query CSR Matrix."); + brute_force::knn(idxIndptr.data(), idxIndices.data(), idxData.data(), @@ -175,6 +181,8 @@ void brute_force_knn(raft::device_csr_matrix 0, "No Values were detected in the Index COO Matrix."); + auto queryRows = coo_query.structure_view().get_rows(); auto queryCols = coo_query.structure_view().get_cols(); auto queryData = coo_query.view().get_elements(); + RAFT_EXPECTS(queryData.size() > 0, "No Values were detected in the Query COO Matrix."); + raft::sparse::op::coo_sort(int(idxRows.size()), int(idxCols.size()), int(idxData.size()), diff --git a/cpp/template/build.sh b/cpp/template/build.sh new file mode 100755 index 0000000000..d7e011e366 --- /dev/null +++ b/cpp/template/build.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +# raft empty project template build script + +# Abort script on first error +set -e + +PARALLEL_LEVEL=${PARALLEL_LEVEL:=`nproc`} + +BUILD_TYPE=Release +BUILD_DIR=build/ + +RAFT_REPO_REL="" +EXTRA_CMAKE_ARGS="" +set -e + + +if [[ ${RAFT_REPO_REL} != "" ]]; then + RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`" + EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}" +fi + +if [ "$1" == "clean" ]; then + rm -rf build + exit 0 +fi + +mkdir -p $BUILD_DIR +cd $BUILD_DIR + +cmake \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DRAFT_NVTX=OFF \ + -DCMAKE_CUDA_ARCHITECTURES="RAPIDS" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ + ${EXTRA_CMAKE_ARGS} \ + ../ + +cmake --build . -j${PARALLEL_LEVEL} diff --git a/cpp/test/sparse/preprocess_coo.cu b/cpp/test/sparse/preprocess_coo.cu index 44dac88cdb..48cf9ae64c 100644 --- a/cpp/test/sparse/preprocess_coo.cu +++ b/cpp/test/sparse/preprocess_coo.cu @@ -162,6 +162,12 @@ TEST_P(SparsePreprocessTfidfCoo, Result) { Run(false); } using SparsePreprocessBm25Coo = SparsePreprocessCoo; TEST_P(SparsePreprocessBm25Coo, Result) { Run(true); } +using SparsePreprocessTfidfCooBig = SparsePreprocessCoo; +TEST_P(SparsePreprocessTfidfCooBig, Result) { Run(false); } + +using SparsePreprocessBm25CooBig = SparsePreprocessCoo; +TEST_P(SparsePreprocessBm25CooBig, Result) { Run(true); } + const std::vector> sparse_preprocess_inputs = { { 10, // n_rows_factor @@ -170,6 +176,14 @@ const std::vector> sparse_preprocess_inputs = }, }; +const std::vector> sparse_preprocess_inputs_big = { + { + 15, // n_rows_factor + 15, // n_cols_factor + 1000000 // nnz_edges + }, +}; + INSTANTIATE_TEST_CASE_P(SparsePreprocessCoo, SparsePreprocessTfidfCoo, ::testing::ValuesIn(sparse_preprocess_inputs)); @@ -177,5 +191,12 @@ INSTANTIATE_TEST_CASE_P(SparsePreprocessCoo, SparsePreprocessBm25Coo, ::testing::ValuesIn(sparse_preprocess_inputs)); +INSTANTIATE_TEST_CASE_P(SparsePreprocessCoo, + SparsePreprocessTfidfCooBig, + ::testing::ValuesIn(sparse_preprocess_inputs_big)); +INSTANTIATE_TEST_CASE_P(SparsePreprocessCoo, + SparsePreprocessBm25CooBig, + ::testing::ValuesIn(sparse_preprocess_inputs_big)); + } // namespace sparse } // namespace raft \ No newline at end of file diff --git a/cpp/test/sparse/preprocess_csr.cu b/cpp/test/sparse/preprocess_csr.cu index e48aabcaa4..c5751f44cb 100644 --- a/cpp/test/sparse/preprocess_csr.cu +++ b/cpp/test/sparse/preprocess_csr.cu @@ -116,9 +116,9 @@ class SparsePreprocessCSR columns_nnz.view(), values_nnz.view(), num_rows); - auto rows_csr = raft::make_device_vector(handle, non_dupe_nnz_count); + auto rows_csr = raft::make_device_vector(handle, non_dupe_nnz_count); raft::sparse::convert::sorted_coo_to_csr( - rows_nnz.data_handle(), int(rows_nnz.size()), rows_csr.data_handle(), num_rows, stream); + rows_nnz.data_handle(), non_dupe_nnz_count, rows_csr.data_handle(), num_rows, stream); auto csr_struct_view = raft::make_device_compressed_structure_view(rows_csr.data_handle(), columns_nnz.data_handle(), @@ -169,6 +169,12 @@ TEST_P(SparsePreprocessTfidfCsr, Result) { Run(false); } using SparsePreprocessBm25Csr = SparsePreprocessCSR; TEST_P(SparsePreprocessBm25Csr, Result) { Run(true); } +using SparsePreprocessTfidfCsrBig = SparsePreprocessCSR; +TEST_P(SparsePreprocessTfidfCsrBig, Result) { Run(false); } + +using SparsePreprocessBm25CsrBig = SparsePreprocessCSR; +TEST_P(SparsePreprocessBm25CsrBig, Result) { Run(true); } + const std::vector> sparse_preprocess_inputs = { { 7, // n_rows_factor @@ -177,6 +183,14 @@ const std::vector> sparse_preprocess_inputs = }, }; +const std::vector> sparse_preprocess_inputs_big = { + { + 12, // n_rows_factor + 12, // n_cols_factor + 100000 // nnz_edges + }, +}; + INSTANTIATE_TEST_CASE_P(SparsePreprocessCSR, SparsePreprocessTfidfCsr, ::testing::ValuesIn(sparse_preprocess_inputs)); @@ -184,5 +198,12 @@ INSTANTIATE_TEST_CASE_P(SparsePreprocessCSR, SparsePreprocessBm25Csr, ::testing::ValuesIn(sparse_preprocess_inputs)); +INSTANTIATE_TEST_CASE_P(SparsePreprocessCSR, + SparsePreprocessTfidfCsrBig, + ::testing::ValuesIn(sparse_preprocess_inputs_big)); +INSTANTIATE_TEST_CASE_P(SparsePreprocessCSR, + SparsePreprocessBm25CsrBig, + ::testing::ValuesIn(sparse_preprocess_inputs_big)); + } // namespace sparse } // namespace raft \ No newline at end of file