Skip to content

Commit

Permalink
WIP: Implement inequality joins by translating to cross + filter
Browse files Browse the repository at this point in the history
Before working through the plumbing in pylibcudf for mixed and
conditional joins and the ast evaluator, let's just support inequality
joins by doing the basic thing.
  • Loading branch information
wence- committed Oct 4, 2024
1 parent 0108391 commit b034937
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from __future__ import annotations

import functools
import json
from contextlib import AbstractContextManager, nullcontext
from functools import singledispatch
Expand Down Expand Up @@ -182,6 +183,27 @@ def _(
with set_node(visitor, node.input_right):
inp_right = translate_ir(visitor, n=None)
right_on = [translate_named_expr(visitor, n=e) for e in node.right_on]
if node.options[0] == "inequality":
# No exposure of mixed/conditional joins in pylibcudf yet, so in
# the first instance, implement by doing a cross join followed by
# a filter.
_, *options, op1, op2 = node.options
cross = ir.Join(schema, inp_left, inp_right, [], [], ("cross", *options))
dtype = plc.DataType(plc.TypeId.BOOL8)
if op2 is None:
ops = [op1]
else:
ops = [op1, op2]
mask = functools.reduce(
functools.partial(
expr.BinOp, dtype, plc.binaryop.BinaryOperator.LOGICAL_AND
),
(
expr.BinOp(dtype, expr.BinOp._MAPPING[op], left.value, right.value)
for op, left, right in zip(ops, left_on, right_on, strict=True)
),
)
return ir.Filter(schema, cross, expr.NamedExpr("mask", mask))
return ir.Join(schema, inp_left, inp_right, left_on, right_on, node.options)


Expand Down

0 comments on commit b034937

Please sign in to comment.