diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 22e25ec26c1d..d9e80cb141d8 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -955,13 +955,17 @@ def string_to_date(op, **kw): ) +@translate.register(ops.StringToTime) +def string_to_time(op, **kw): + arg = translate(op.arg, **kw) + return arg.str.to_time(format=_literal_value(op.format_str)) + + @translate.register(ops.StringToTimestamp) def string_to_timestamp(op, **kw): arg = translate(op.arg, **kw) - return arg.str.strptime( - dtype=pl.Datetime, - format=_literal_value(op.format_str), - ) + format = _literal_value(op.format_str) + return arg.str.strptime(dtype=pl.Datetime, format=format) @translate.register(ops.TimestampDiff) diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index e41290bbab91..fb2ac89c7fb1 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1042,6 +1042,9 @@ def visit_NotNull(self, op, *, arg): def visit_InValues(self, op, *, value, options): return value.isin(*options) + def visit_StringToTime(self, op, *, arg, format_str): + return self.f.time(self.f.str_to_time(arg, format_str)) + ### Counting def visit_CountDistinct(self, op, *, arg, where): diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index 1c683b511514..4913a3636114 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -53,6 +53,7 @@ class ClickHouseCompiler(SQLGlotCompiler): ops.TimeDelta, ops.StringToTimestamp, ops.StringToDate, + ops.StringToTime, ops.Levenshtein, ) diff --git a/ibis/backends/sql/compilers/datafusion.py b/ibis/backends/sql/compilers/datafusion.py index 8cecd30c02d1..51b8c4472d5e 100644 --- a/ibis/backends/sql/compilers/datafusion.py +++ b/ibis/backends/sql/compilers/datafusion.py @@ -49,6 +49,7 @@ class DataFusionCompiler(SQLGlotCompiler): ops.TypeOf, ops.StringToDate, ops.StringToTimestamp, + ops.StringToTime, ) SIMPLE_OPS = { diff --git a/ibis/backends/sql/compilers/druid.py b/ibis/backends/sql/compilers/druid.py index 6548265b95e3..9e8bacb02164 100644 --- a/ibis/backends/sql/compilers/druid.py +++ b/ibis/backends/sql/compilers/druid.py @@ -55,6 +55,7 @@ class DruidCompiler(SQLGlotCompiler): ops.StringAscii, ops.StringSplit, ops.StringToDate, + ops.StringToTime, ops.StringToTimestamp, ops.TimeDelta, ops.TimestampBucket, diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index acb365ba607d..87f3ca13eab3 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -709,5 +709,8 @@ def visit_TableUnnest( .join(unnest, join_type="CROSS" if not keep_empty else "LEFT") ) + def visit_StringToTime(self, op, *, arg, format_str): + return self.cast(self.f.str_to_time(arg, format_str), to=dt.time) + compiler = DuckDBCompiler() diff --git a/ibis/backends/sql/compilers/exasol.py b/ibis/backends/sql/compilers/exasol.py index f45a52cbffbb..fa5578783a01 100644 --- a/ibis/backends/sql/compilers/exasol.py +++ b/ibis/backends/sql/compilers/exasol.py @@ -65,6 +65,7 @@ class ExasolCompiler(SQLGlotCompiler): ops.StringSplit, ops.StringToDate, ops.StringToTimestamp, + ops.StringToTime, ops.TimeDelta, ops.TimestampAdd, ops.TimestampBucket, diff --git a/ibis/backends/sql/compilers/flink.py b/ibis/backends/sql/compilers/flink.py index 2cbb7163a034..10a3b3db0947 100644 --- a/ibis/backends/sql/compilers/flink.py +++ b/ibis/backends/sql/compilers/flink.py @@ -87,6 +87,7 @@ class FlinkCompiler(SQLGlotCompiler): ops.RowID, ops.StringSplit, ops.Translate, + ops.StringToTime, ) SIMPLE_OPS = { diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index fb87f14e9cdb..df40ecc55652 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -41,6 +41,7 @@ class ImpalaCompiler(SQLGlotCompiler): ops.RegexSplit, ops.RowID, ops.StringSplit, + ops.StringToTime, ops.StructColumn, ops.Time, ops.TimeDelta, diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index dd8d888d3442..aeeb4f9db4a3 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -115,6 +115,7 @@ class MSSQLCompiler(SQLGlotCompiler): ops.StringSplit, ops.StringToDate, ops.StringToTimestamp, + ops.StringToTime, ops.StructColumn, ops.TimestampDiff, ops.Unnest, diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index 5d3daf74c6f9..7fef6d7715f0 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -73,6 +73,7 @@ class OracleCompiler(SQLGlotCompiler): ops.ExtractDayOfYear, ops.RowID, ops.RandomUUID, + ops.StringToTime, ) SIMPLE_OPS = { diff --git a/ibis/backends/sql/compilers/postgres.py b/ibis/backends/sql/compilers/postgres.py index 51fe532cb905..3e38d87c5352 100644 --- a/ibis/backends/sql/compilers/postgres.py +++ b/ibis/backends/sql/compilers/postgres.py @@ -827,5 +827,8 @@ def visit_ArrayAny(self, op, *, arg): def visit_ArrayAll(self, op, *, arg): return self._array_reduction(arg=arg, reduction="bool_and") + def visit_StringToTime(self, op, *, arg, format_str): + return self.cast(self.f.str_to_time(arg, format_str), to=dt.time) + compiler = PostgresCompiler() diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index 5587a5186a76..55d661968cd3 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -63,6 +63,7 @@ class PySparkCompiler(SQLGlotCompiler): ops.RowID, ops.TimestampBucket, ops.RandomUUID, + ops.StringToTime, ) LOWERED_OPS = { diff --git a/ibis/backends/sql/compilers/sqlite.py b/ibis/backends/sql/compilers/sqlite.py index aec56809bcad..115d90aafae4 100644 --- a/ibis/backends/sql/compilers/sqlite.py +++ b/ibis/backends/sql/compilers/sqlite.py @@ -58,6 +58,7 @@ class SQLiteCompiler(SQLGlotCompiler): ops.TimestampDiff, ops.StringToDate, ops.StringToTimestamp, + ops.StringToTime, ops.TimeDelta, ops.TimestampDelta, ops.TryCast, diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index a821bc43535a..0e8873e77bfd 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -52,6 +52,7 @@ class TrinoCompiler(SQLGlotCompiler): ops.Median, ops.RowID, ops.TimestampBucket, + ops.StringToTime, ) LOWERED_OPS = { diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 9c08d7fe3245..e34f50bd2900 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1179,7 +1179,7 @@ def test_integer_to_timestamp(backend, con, unit): raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) -def test_string_to_timestamp(alltypes, fmt): +def test_string_as_timestamp(alltypes, fmt): table = alltypes result = table.mutate(date=table.date_string_col.as_timestamp(fmt)).execute() @@ -1250,7 +1250,7 @@ def test_string_to_timestamp(alltypes, fmt): raises=com.OperationNotDefinedError, ) @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) -def test_string_to_date(alltypes, fmt): +def test_string_as_date(alltypes, fmt): table = alltypes result = table.mutate(date=table.date_string_col.as_date(fmt)).execute() @@ -1260,6 +1260,37 @@ def test_string_to_date(alltypes, fmt): assert val.strftime("%m/%d/%y") == result["date_string_col"][i] +@pytest.mark.notyet( + [ + "pyspark", + "exasol", + "clickhouse", + "impala", + "mssql", + "oracle", + "trino", + "druid", + "datafusion", + "flink", + ], + raises=com.OperationNotDefinedError, +) +@pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedOperationError) +def test_string_as_time(backend, alltypes): + fmt = "%H:%M:%S" + table = alltypes.mutate( + time_string_col=alltypes.timestamp_col.truncate("s").time().cast(str) + ) + expr = table.mutate(time=table.time_string_col.as_time(fmt)) + result = expr.execute() + + # TEST: do we get the same date out, that we put in? + # format string assumes that we are using pandas' strftime + backend.assert_series_equal( + result["time"], result["timestamp_col"].dt.floor("s").dt.time.rename("time") + ) + + @pytest.mark.parametrize( ("date", "expected_index", "expected_day"), [ diff --git a/ibis/expr/operations/temporal.py b/ibis/expr/operations/temporal.py index fa17d6f7c14f..da4ae348b466 100644 --- a/ibis/expr/operations/temporal.py +++ b/ibis/expr/operations/temporal.py @@ -94,6 +94,17 @@ class StringToDate(Value): dtype = dt.date +@public +class StringToTime(Value): + """Convert a string to a time.""" + + arg: Value[dt.String] + format_str: Value[dt.String] + + shape = rlz.shape_like("args") + dtype = dt.time + + @public class ExtractTemporalField(Unary): """Extract a field from a temporal value.""" diff --git a/ibis/expr/types/strings.py b/ibis/expr/types/strings.py index 859381b91d33..49d272f6a299 100644 --- a/ibis/expr/types/strings.py +++ b/ibis/expr/types/strings.py @@ -1336,6 +1336,35 @@ def as_date(self, format_str: str) -> ir.DateValue: def to_date(self, format_str: str) -> ir.DateValue: return self.as_date(format_str=format_str) + def as_time(self, format_str: str) -> ir.TimeValue: + """Parse a string and return a time. + + Parameters + ---------- + format_str + Format string in `strptime` format + + Returns + ------- + TimeValue + Parsed time value + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"ts": ["20:01:02"]}) + >>> t.ts.as_time("%H:%M:%S") + ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ StringToTime(ts, '%H:%M:%S') ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ time │ + ├──────────────────────────────┤ + │ 20:01:02 │ + └──────────────────────────────┘ + """ + return ops.StringToTime(self, format_str).to_expr() + def protocol(self): """Parse a URL and extract protocol.