Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into pandas_upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Oct 4, 2024
2 parents 14093d7 + a8da1ff commit cfa8819
Show file tree
Hide file tree
Showing 35 changed files with 503 additions and 169 deletions.
14 changes: 7 additions & 7 deletions cpp/include/cudf/detail/utilities/logger.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,9 +19,9 @@
#include <cudf/utilities/logger.hpp>

// Log messages that require computation should only be used at level TRACE and DEBUG
#define CUDF_LOG_TRACE(...) SPDLOG_LOGGER_TRACE(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_DEBUG(...) SPDLOG_LOGGER_DEBUG(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_INFO(...) SPDLOG_LOGGER_INFO(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_WARN(...) SPDLOG_LOGGER_WARN(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_ERROR(...) SPDLOG_LOGGER_ERROR(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::logger(), __VA_ARGS__)
#define CUDF_LOG_TRACE(...) SPDLOG_LOGGER_TRACE(&cudf::detail::logger(), __VA_ARGS__)
#define CUDF_LOG_DEBUG(...) SPDLOG_LOGGER_DEBUG(&cudf::detail::logger(), __VA_ARGS__)
#define CUDF_LOG_INFO(...) SPDLOG_LOGGER_INFO(&cudf::detail::logger(), __VA_ARGS__)
#define CUDF_LOG_WARN(...) SPDLOG_LOGGER_WARN(&cudf::detail::logger(), __VA_ARGS__)
#define CUDF_LOG_ERROR(...) SPDLOG_LOGGER_ERROR(&cudf::detail::logger(), __VA_ARGS__)
#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::detail::logger(), __VA_ARGS__)
8 changes: 7 additions & 1 deletion cpp/include/cudf/utilities/logger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@

namespace CUDF_EXPORT cudf {

namespace detail {
spdlog::logger& logger();
}

/**
* @brief Returns the global logger.
*
Expand All @@ -43,6 +47,8 @@ namespace CUDF_EXPORT cudf {
*
* @return spdlog::logger& The logger.
*/
spdlog::logger& logger();
[[deprecated(
"Support for direct access to spdlog loggers in cudf is planned for removal")]] spdlog::logger&
logger();

} // namespace CUDF_EXPORT cudf
2 changes: 1 addition & 1 deletion cpp/include/nvtext/edit_distance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ namespace CUDF_EXPORT nvtext {
* @param targets Strings to compute edit distance against `input`
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings columns of with replaced strings
* @return New lists column of edit distance values
*/
std::unique_ptr<cudf::column> edit_distance(
cudf::strings_column_view const& input,
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/utilities/logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ struct logger_wrapper {

} // namespace

spdlog::logger& cudf::logger()
spdlog::logger& cudf::detail::logger()
{
static logger_wrapper wrapped{};
return wrapped.logger_;
}

spdlog::logger& cudf::logger() { return cudf::detail::logger(); }
37 changes: 19 additions & 18 deletions cpp/tests/utilities_tests/logger_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@ class LoggerTest : public cudf::test::BaseFixture {
std::vector<spdlog::sink_ptr> prev_sinks;

public:
LoggerTest() : prev_level{cudf::logger().level()}, prev_sinks{cudf::logger().sinks()}
LoggerTest()
: prev_level{cudf::detail::logger().level()}, prev_sinks{cudf::detail::logger().sinks()}
{
cudf::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
cudf::logger().set_formatter(
cudf::detail::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
cudf::detail::logger().set_formatter(
std::unique_ptr<spdlog::formatter>(new spdlog::pattern_formatter("%v")));
}
~LoggerTest() override
{
cudf::logger().set_level(prev_level);
cudf::logger().sinks() = prev_sinks;
cudf::detail::logger().set_level(prev_level);
cudf::detail::logger().sinks() = prev_sinks;
}

void clear_sink() { oss.str(""); }
Expand All @@ -46,32 +47,32 @@ class LoggerTest : public cudf::test::BaseFixture {

TEST_F(LoggerTest, Basic)
{
cudf::logger().critical("crit msg");
cudf::detail::logger().critical("crit msg");
ASSERT_EQ(this->sink_content(), "crit msg\n");
}

TEST_F(LoggerTest, DefaultLevel)
{
cudf::logger().trace("trace");
cudf::logger().debug("debug");
cudf::logger().info("info");
cudf::logger().warn("warn");
cudf::logger().error("error");
cudf::logger().critical("critical");
cudf::detail::logger().trace("trace");
cudf::detail::logger().debug("debug");
cudf::detail::logger().info("info");
cudf::detail::logger().warn("warn");
cudf::detail::logger().error("error");
cudf::detail::logger().critical("critical");
ASSERT_EQ(this->sink_content(), "warn\nerror\ncritical\n");
}

TEST_F(LoggerTest, CustomLevel)
{
cudf::logger().set_level(spdlog::level::warn);
cudf::logger().info("info");
cudf::logger().warn("warn");
cudf::detail::logger().set_level(spdlog::level::warn);
cudf::detail::logger().info("info");
cudf::detail::logger().warn("warn");
ASSERT_EQ(this->sink_content(), "warn\n");

this->clear_sink();

cudf::logger().set_level(spdlog::level::debug);
cudf::logger().trace("trace");
cudf::logger().debug("debug");
cudf::detail::logger().set_level(spdlog::level::debug);
cudf::detail::logger().trace("trace");
cudf::detail::logger().debug("debug");
ASSERT_EQ(this->sink_content(), "debug\n");
}
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ This page provides API documentation for pylibcudf.

io/index.rst
strings/index.rst
nvtext/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=============
edit_distance
=============

.. automodule:: pylibcudf.nvtext.edit_distance
:members:
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
nvtext
======

.. toctree::
:maxdepth: 1

edit_distance
34 changes: 10 additions & 24 deletions python/cudf/cudf/_lib/nvtext/edit_distance.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,23 @@

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.nvtext.edit_distance cimport (
edit_distance as cpp_edit_distance,
edit_distance_matrix as cpp_edit_distance_matrix,
)
from pylibcudf cimport nvtext

from cudf._lib.column cimport Column


@acquire_spill_lock()
def edit_distance(Column strings, Column targets):
cdef column_view c_strings = strings.view()
cdef column_view c_targets = targets.view()
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_edit_distance(c_strings, c_targets))

return Column.from_unique_ptr(move(c_result))
result = nvtext.edit_distance.edit_distance(
strings.to_pylibcudf(mode="read"),
targets.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(result)


@acquire_spill_lock()
def edit_distance_matrix(Column strings):
cdef column_view c_strings = strings.view()
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_edit_distance_matrix(c_strings))

return Column.from_unique_ptr(move(c_result))
result = nvtext.edit_distance.edit_distance_matrix(
strings.to_pylibcudf(mode="read")
)
return Column.from_pylibcudf(result)
110 changes: 18 additions & 92 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from cudf._lib.column cimport Column

from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar

from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES

from libcpp.memory cimport unique_ptr
Expand All @@ -14,14 +11,6 @@ from libcpp.utility cimport move

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.scalar.scalar cimport string_scalar
from pylibcudf.libcudf.strings.convert.convert_booleans cimport (
from_booleans as cpp_from_booleans,
to_booleans as cpp_to_booleans,
)
from pylibcudf.libcudf.strings.convert.convert_datetime cimport (
is_timestamp as cpp_is_timestamp,
)
from pylibcudf.libcudf.strings.convert.convert_floats cimport (
from_floats as cpp_from_floats,
to_floats as cpp_to_floats,
Expand Down Expand Up @@ -427,77 +416,21 @@ def stoul(Column input_col):
return string_to_integer(input_col, cudf.dtype("uint64"))


def _to_booleans(Column input_col, object string_true="True"):
"""
Converting/Casting input column of type string to boolean column
Parameters
----------
input_col : input column of type string
string_true : string that represents True
Returns
-------
A Column with string values cast to boolean
"""

cdef DeviceScalar str_true = as_device_scalar(string_true)
cdef column_view input_column_view = input_col.view()
cdef const string_scalar* string_scalar_true = <const string_scalar*>(
str_true.get_raw_ptr())
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_to_booleans(
input_column_view,
string_scalar_true[0]))

return Column.from_unique_ptr(move(c_result))


def to_booleans(Column input_col):

return _to_booleans(input_col)


def _from_booleans(
Column input_col,
object string_true="True",
object string_false="False"):
"""
Converting/Casting input column of type boolean to string column
Parameters
----------
input_col : input column of type boolean
string_true : string that represents True
string_false : string that represents False
Returns
-------
A Column with boolean values cast to string
"""

cdef DeviceScalar str_true = as_device_scalar(string_true)
cdef DeviceScalar str_false = as_device_scalar(string_false)
cdef column_view input_column_view = input_col.view()
cdef const string_scalar* string_scalar_true = <const string_scalar*>(
str_true.get_raw_ptr())
cdef const string_scalar* string_scalar_false = <const string_scalar*>(
str_false.get_raw_ptr())
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_from_booleans(
input_column_view,
string_scalar_true[0],
string_scalar_false[0]))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_booleans.to_booleans(
input_col.to_pylibcudf(mode="read"),
as_device_scalar("True").c_value,
)
return Column.from_pylibcudf(plc_column)


def from_booleans(Column input_col):
return _from_booleans(input_col)
plc_column = plc.strings.convert.convert_booleans.from_booleans(
input_col.to_pylibcudf(mode="read"),
as_device_scalar("True").c_value,
as_device_scalar("False").c_value,
)
return Column.from_pylibcudf(plc_column)


def int2timestamp(
Expand All @@ -520,11 +453,10 @@ def int2timestamp(
A Column with date-time represented in string format
"""
cdef string c_timestamp_format = format.encode("UTF-8")
return Column.from_pylibcudf(
plc.strings.convert.convert_datetime.from_timestamps(
input_col.to_pylibcudf(mode="read"),
c_timestamp_format,
format,
names.to_pylibcudf(mode="read")
)
)
Expand All @@ -545,12 +477,11 @@ def timestamp2int(Column input_col, dtype, format):
"""
dtype = dtype_to_pylibcudf_type(dtype)
cdef string c_timestamp_format = format.encode('UTF-8')
return Column.from_pylibcudf(
plc.strings.convert.convert_datetime.to_timestamps(
input_col.to_pylibcudf(mode="read"),
dtype,
c_timestamp_format
format
)
)

Expand All @@ -572,16 +503,11 @@ def istimestamp(Column input_col, str format):
"""
if input_col.size == 0:
return cudf.core.column.column_empty(0, dtype=cudf.dtype("bool"))
cdef column_view input_column_view = input_col.view()
cdef string c_timestamp_format = <string>str(format).encode('UTF-8')
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_is_timestamp(
input_column_view,
c_timestamp_format))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_datetime.is_timestamp(
input_col.to_pylibcudf(mode="read"),
format
)
return Column.from_pylibcudf(plc_column)


def timedelta2int(Column input_col, dtype, format):
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def _tile(A, reps):
nval = len(value_vars)
dtype = min_unsigned_type(nval)

if not var_name:
if var_name is None:
var_name = "variable"

if not value_vars:
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def test_melt_str_scalar_id_var():
assert_eq(result, expected)


def test_melt_falsy_var_name():
df = cudf.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
result = cudf.melt(df, id_vars=["A"], value_vars=["B"], var_name="")
expected = pd.melt(
df.to_pandas(), id_vars=["A"], value_vars=["B"], var_name=""
)
assert_eq(result, expected)


@pytest.mark.parametrize("num_cols", [1, 2, 10])
@pytest.mark.parametrize("num_rows", [1, 2, 1000])
@pytest.mark.parametrize(
Expand Down
Loading

0 comments on commit cfa8819

Please sign in to comment.