Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(udf): move UDF implementations to expr_impl #16759

Merged
merged 13 commits into from
May 20, 2024
25 changes: 11 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 20 additions & 2 deletions Makefile.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ is_release = get_env ENABLE_RELEASE_PROFILE
is_not_release = not ${is_release}
is_dynamic_linking = get_env ENABLE_DYNAMIC_LINKING
is_hummock_trace = get_env ENABLE_HUMMOCK_TRACE
is_external_udf_enabled = get_env ENABLE_EXTERNAL_UDF
is_wasm_udf_enabled = get_env ENABLE_WASM_UDF
is_js_udf_enabled = get_env ENABLE_JS_UDF
is_deno_udf_enabled = get_env ENABLE_DENO_UDF
is_python_udf_enabled = get_env ENABLE_PYTHON_UDF

Expand All @@ -59,14 +62,29 @@ else
set_env RISINGWAVE_FEATURE_FLAGS "--features rw-static-link"
end

if ${is_external_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features external-udf"
end

if ${is_wasm_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features wasm-udf"
end

if ${is_js_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features js-udf"
end

if ${is_deno_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features embedded-deno-udf"
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features deno-udf"
end

if ${is_python_udf_enabled}
flags = get_env RISINGWAVE_FEATURE_FLAGS
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features embedded-python-udf"
set_env RISINGWAVE_FEATURE_FLAGS "${flags} --features python-udf"
end

if ${is_hummock_trace}
Expand Down
3 changes: 1 addition & 2 deletions ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ cargo build \
-p risingwave_compaction_test \
-p risingwave_e2e_extended_mode_test \
"${RISINGWAVE_FEATURE_FLAGS[@]}" \
--features embedded-deno-udf \
--features embedded-python-udf \
--features all-udf \
--profile "$profile" \
--timings

Expand Down
6 changes: 4 additions & 2 deletions ci/scripts/release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ if [ "${ARCH}" == "aarch64" ]; then
# see https://github.com/tikv/jemallocator/blob/802969384ae0c581255f3375ee2ba774c8d2a754/jemalloc-sys/build.rs#L218
export JEMALLOC_SYS_WITH_LG_PAGE=16
fi
cargo build -p risingwave_cmd_all --features "rw-static-link" --profile release
cargo build -p risingwave_cmd --bin risectl --features "rw-static-link" --profile release
cargo build -p risingwave_cmd_all -p risingwave_cmd --bin risectl \
--features rw-static-link \
--features all-udf \
--profile release
wangrunji0408 marked this conversation as resolved.
Show resolved Hide resolved
cd target/release && chmod +x risingwave risectl

echo "--- Upload nightly binary to s3"
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ WORKDIR /risingwave
ENV ENABLE_BUILD_DASHBOARD=1

RUN cargo fetch && \
cargo build -p risingwave_cmd_all --release --features "rw-static-link" --features embedded-deno-udf --features embedded-python-udf && \
cargo build -p risingwave_cmd_all --release --features "rw-static-link" --features all-udf && \
mkdir -p /risingwave/bin && \
mv /risingwave/target/release/risingwave /risingwave/bin/ && \
mv /risingwave/target/release/risingwave.dwp /risingwave/bin/ && \
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.hdfs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ ENV JAVA_HOME ${JAVA_HOME_PATH}
ENV LD_LIBRARY_PATH ${JAVA_HOME_PATH}/lib/server:${LD_LIBRARY_PATH}

RUN cargo fetch && \
cargo build -p risingwave_cmd_all --release -p risingwave_object_store --features hdfs-backend --features "rw-static-link" --features embedded-deno-udf --features embedded-python-udf && \
cargo build -p risingwave_cmd_all --release -p risingwave_object_store --features hdfs-backend --features "rw-static-link" --features all-udf && \
mkdir -p /risingwave/bin && \
mv /risingwave/target/release/risingwave /risingwave/bin/ && \
mv /risingwave/target/release/risingwave.dwp /risingwave/bin/ && \
Expand Down
5 changes: 2 additions & 3 deletions e2e_test/error_ui/simple/main.slt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ create function int_42() returns int as int_42 using link '555.0.0.1:8815';
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: UDF error
3: Flight service error: invalid address: 555.0.0.1:8815, err: failed to parse address: http://555.0.0.1:8815: invalid IPv4 address
1: failed to parse address: http://555.0.0.1:8815
2: invalid IPv4 address


statement error
Expand Down
11 changes: 7 additions & 4 deletions src/cmd_all/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@ license = { workspace = true }
repository = { workspace = true }

[features]
default = ["rw-static-link"]
rw-static-link = ["workspace-config/rw-static-link"]
rw-dynamic-link = ["workspace-config/rw-dynamic-link"]
embedded-deno-udf = ["risingwave_expr/embedded-deno-udf"]
embedded-python-udf = ["risingwave_expr/embedded-python-udf"]
default = ["rw-static-link"]
all-udf = ["external-udf", "wasm-udf", "js-udf", "deno-udf", "python-udf"]
external-udf = ["risingwave_expr_impl/external-udf"]
wasm-udf = ["risingwave_expr_impl/wasm-udf"]
js-udf = ["risingwave_expr_impl/js-udf"]
deno-udf = ["risingwave_expr_impl/deno-udf"]
python-udf = ["risingwave_expr_impl/python-udf"]
Comment on lines +11 to +19
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBO I don't very like feature flags, but acceptable since there are already existing ones. 1 flag is as bad as N flags...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My consideration is that each UDF backend brings many dependencies, and most of our developers don't touch UDF at all. So it would be better to skip them in local development. The same thing could be applied to connectors. For example, I almost never use any connector in my development. Those dependencies could be skipped as well to save my disk.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I think it's time to split connector implementations off and design a clear interface for it, as there are now community developers willing to develop new connectors. 🤔

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, I think it's time to split connector implementations off

BTW, #12981 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I noticed this PR. It's a little pity it was not merged, but I think it's in the right direction.


[package.metadata.cargo-machete]
ignored = ["workspace-hack", "workspace-config", "task_stats_alloc"]
Expand All @@ -32,7 +36,6 @@ risingwave_common = { workspace = true }
risingwave_compactor = { workspace = true }
risingwave_compute = { workspace = true }
risingwave_ctl = { workspace = true }
risingwave_expr = { workspace = true }
risingwave_expr_impl = { workspace = true }
risingwave_frontend = { workspace = true }
risingwave_meta_node = { workspace = true }
Expand Down
15 changes: 0 additions & 15 deletions src/expr/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,10 @@ ignored = ["workspace-hack", "ctor"]
[package.metadata.cargo-udeps.ignore]
normal = ["workspace-hack", "ctor"]

[features]
embedded-deno-udf = ["arrow-udf-js-deno"]
embedded-python-udf = ["arrow-udf-python"]

[dependencies]
anyhow = "1"
arrow-array = { workspace = true }
arrow-flight = "50"
arrow-schema = { workspace = true }
arrow-udf-flight = { workspace = true }
arrow-udf-js = { workspace = true }
arrow-udf-js-deno = { workspace = true, optional = true }
arrow-udf-python = { workspace = true, optional = true }
arrow-udf-wasm = { workspace = true }
async-trait = "0.1"
auto_impl = "1"
await-tree = { workspace = true }
Expand All @@ -46,11 +36,8 @@ enum-as-inner = "0.6"
futures = "0.3"
futures-async-stream = { workspace = true }
futures-util = "0.3"
ginepro = "0.7"
itertools = { workspace = true }
linkme = { version = "0.3", features = ["used_linker"] }
md5 = "0.7"
moka = { version = "0.12", features = ["sync"] }
num-traits = "0.2"
openssl = { version = "0.10", features = ["vendored"] }
parse-display = "0.9"
Expand All @@ -68,9 +55,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
"rt-multi-thread",
"macros",
] }
tonic = "0.10"
tracing = "0.1"
zstd = { version = "0.13", default-features = false }

[target.'cfg(not(madsim))'.dependencies]
workspace-hack = { path = "../../workspace-hack" }
Expand Down
13 changes: 0 additions & 13 deletions src/expr/core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,6 @@ pub enum ExprError {
anyhow::Error,
),

#[error("UDF error: {0}")]
Udf(
#[from]
#[backtrace]
Box<arrow_udf_flight::Error>,
),

#[error("not a constant")]
NotConstant,

Expand Down Expand Up @@ -156,12 +149,6 @@ impl From<PbFieldNotFound> for ExprError {
}
}

impl From<arrow_udf_flight::Error> for ExprError {
fn from(err: arrow_udf_flight::Error) -> Self {
Self::Udf(Box::new(err))
}
}

/// A collection of multiple errors.
#[derive(Error, Debug)]
pub struct MultiExprError(Box<[ExprError]>);
Expand Down
Loading
Loading