-
Notifications
You must be signed in to change notification settings - Fork 912
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate nvtext jaccard API to pylibcudf
- Loading branch information
Showing
10 changed files
with
83 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,4 @@ nvtext | |
|
||
edit_distance | ||
generate_ngrams | ||
jaccard |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/jaccard.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
======= | ||
jaccard | ||
======= | ||
|
||
.. automodule:: pylibcudf.nvtext.jaccard | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . cimport edit_distance, generate_ngrams | ||
from . cimport edit_distance, generate_ngrams, jaccard | ||
|
||
__all__ = [ | ||
"edit_distance", | ||
"generate_ngrams", | ||
"jaccard", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . import edit_distance, generate_ngrams | ||
from . import edit_distance, generate_ngrams, jaccard | ||
|
||
__all__ = [ | ||
"edit_distance", | ||
"generate_ngrams", | ||
"jaccard", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.types cimport size_type | ||
|
||
|
||
cpdef Column jaccard_index(Column input1, Column input2, size_type width) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.column.column_view cimport column_view | ||
from pylibcudf.libcudf.nvtext.jaccard cimport ( | ||
jaccard_index as cpp_jaccard_index, | ||
) | ||
from pylibcudf.libcudf.types cimport size_type | ||
|
||
|
||
cpdef Column jaccard_index(Column input1, Column input2, size_type width): | ||
""" | ||
Returns the Jaccard similarity between individual rows in two strings columns. | ||
For details, see :cpp:func:`jaccard_index` | ||
Parameters | ||
---------- | ||
input1 : Column | ||
Input strings column | ||
input2 : Column | ||
Input strings column | ||
width : size_type | ||
The ngram number to generate | ||
Returns | ||
------- | ||
Column | ||
Index calculation values | ||
""" | ||
cdef column_view c_input1 = input1.view() | ||
cdef column_view c_input2 = input2.view() | ||
cdef unique_ptr[column] c_result | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_jaccard_index( | ||
c_input1, | ||
c_input2, | ||
width | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |