diff --git a/ci/Dockerfile b/ci/Dockerfile index 78c48f0c648d6..5902b612c071c 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -4,22 +4,18 @@ # and corresponding rw-build-env version in docker-compose.yml. # ############################################################################################# -FROM ubuntu:22.04 AS risingwave-build-env +FROM ubuntu:24.04 AS risingwave-build-env ENV LANG en_US.utf8 # Use AWS ubuntu mirror RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://us-east-2.ec2.archive.ubuntu.com/ubuntu/|g' /etc/apt/sources.list RUN apt-get update -yy && \ - DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl parallel python3 python3-pip software-properties-common \ - openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-11-jdk wget unzip git tmux lld postgresql-client kafkacat netcat mysql-client \ - maven zstd libzstd-dev locales -yy \ + DEBIAN_FRONTEND=noninteractive apt-get -y install make build-essential cmake protobuf-compiler curl parallel python3 python3-pip python3-venv software-properties-common \ + openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-11-jdk wget unzip git tmux lld postgresql-client kcat netcat-openbsd mysql-client \ + maven zstd libzstd-dev locales \ + python3.12 python3.12-dev \ && rm -rf /var/lib/{apt,dpkg,cache,log}/ -# Install Python 3.12 -RUN add-apt-repository ppa:deadsnakes/ppa -y && \ - apt-get update -yy && \ - DEBIAN_FRONTEND=noninteractive apt-get install python3.12 python3.12-dev -yy && \ - rm -rf /var/lib/{apt,dpkg,cache,log}/ ENV PYO3_PYTHON=python3.12 SHELL ["/bin/bash", "-c"] @@ -34,21 +30,20 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-mo ENV PATH /root/.cargo/bin/:$PATH -RUN rustup show -RUN rustup default `rustup show active-toolchain | awk '{print $1}'` +RUN rustup show && \ + rustup default `rustup show active-toolchain | awk '{print $1}'` && \ + rustup component add rustfmt llvm-tools-preview clippy && \ + rustup target add wasm32-wasi RUN curl -sSL "https://github.com/bufbuild/buf/releases/download/v1.29.0/buf-$(uname -s)-$(uname -m).tar.gz" | \ tar -xvzf - -C /usr/local --strip-components 1 # install python dependencies -RUN pip3 install pyarrow pytest +RUN pip3 install --break-system-packages pyarrow pytest # Install poetry RUN curl -sSL https://install.python-poetry.org | python3 - - -# add required rustup components -RUN rustup component add rustfmt llvm-tools-preview clippy -RUN rustup target add wasm32-wasi +ENV PATH /root/.local/bin:$PATH ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh index 1a23144a8e8fe..074f60aa97e06 100755 --- a/ci/build-ci-image.sh +++ b/ci/build-ci-image.sh @@ -10,7 +10,7 @@ cat ../rust-toolchain # shellcheck disable=SC2155 # REMEMBER TO ALSO UPDATE ci/docker-compose.yml -export BUILD_ENV_VERSION=v20240405_1 +export BUILD_ENV_VERSION=v20240413 export BUILD_TAG="public.ecr.aws/w1p7b4n3/rw-build-env:${BUILD_ENV_VERSION}" diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 3b99cf1082df8..1d67188b1c4fb 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -71,7 +71,7 @@ services: retries: 5 source-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240405_1 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413 depends_on: - mysql - db @@ -84,7 +84,7 @@ services: - ..:/risingwave sink-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240405_1 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413 depends_on: - mysql - db @@ -103,12 +103,12 @@ services: rw-build-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240405_1 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413 volumes: - ..:/risingwave ci-flamegraph-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240405_1 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413 # NOTE(kwannoel): This is used in order to permit # syscalls for `nperf` (perf_event_open), # so it can do CPU profiling. @@ -119,7 +119,7 @@ services: - ..:/risingwave regress-test-env: - image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240405_1 + image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240413 depends_on: db: condition: service_healthy diff --git a/ci/scripts/connector-node-integration-test.sh b/ci/scripts/connector-node-integration-test.sh index 0ebdd35ea682a..519215584abb9 100755 --- a/ci/scripts/connector-node-integration-test.sh +++ b/ci/scripts/connector-node-integration-test.sh @@ -86,7 +86,7 @@ tar xf java-binding-integration-test.tar.zst bin echo "--- prepare integration tests" cd "${RISINGWAVE_ROOT}"/java/connector-node -pip3 install grpcio grpcio-tools psycopg2 psycopg2-binary pyspark==3.3 black +pip3 install --break-system-packages grpcio grpcio-tools psycopg2 psycopg2-binary pyspark==3.3 black cd python-client && bash gen-stub.sh && bash format-python.sh --check export PYTHONPATH=proto diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh index 6ca3d566d8a52..cae03843c4703 100755 --- a/ci/scripts/e2e-cassandra-sink-test.sh +++ b/ci/scripts/e2e-cassandra-sink-test.sh @@ -38,6 +38,12 @@ sleep 1 echo "--- create cassandra table" curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz --output apache-cassandra-4.1.3-bin.tar.gz tar xfvz apache-cassandra-4.1.3-bin.tar.gz +# remove bundled packages, and use installed packages, because Python 3.12 has removed asyncore, but I failed to install libev support for bundled Python driver. +rm apache-cassandra-4.1.3/lib/six-1.12.0-py2.py3-none-any.zip +rm apache-cassandra-4.1.3/lib/cassandra-driver-internal-only-3.25.0.zip +apt-get install -y libev4 libev-dev +pip3 install --break-system-packages cassandra-driver + cd apache-cassandra-4.1.3/bin export CQLSH_HOST=cassandra-server export CQLSH_PORT=9042 @@ -55,8 +61,9 @@ if cat ./query_result.csv | awk -F "," '{ exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then echo "Cassandra sink check passed" else - cat ./query_result.csv echo "The output is not as expected." + echo "output:" + cat ./query_result.csv exit 1 fi diff --git a/ci/scripts/e2e-iceberg-cdc.sh b/ci/scripts/e2e-iceberg-cdc.sh index 58b18bd3e059b..fc90f81d985ab 100755 --- a/ci/scripts/e2e-iceberg-cdc.sh +++ b/ci/scripts/e2e-iceberg-cdc.sh @@ -47,13 +47,13 @@ bash ./start_spark_connect_server.sh # Don't remove the `--quiet` option since poetry has a bug when printing output, see # https://github.com/python-poetry/poetry/issues/3412 -"$HOME"/.local/bin/poetry update --quiet +poetry update --quiet # 1. import data to mysql mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc.sql # 2. create table and sink -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc_init.toml +poetry run python main.py -t ./test_case/cdc/no_partition_cdc_init.toml # 3. insert new data to mysql mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc_insert.sql @@ -61,4 +61,4 @@ mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc_inse sleep 20 # 4. check change -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc.toml \ No newline at end of file +poetry run python main.py -t ./test_case/cdc/no_partition_cdc.toml \ No newline at end of file diff --git a/ci/scripts/e2e-iceberg-sink-v2-test.sh b/ci/scripts/e2e-iceberg-sink-v2-test.sh index 847b6d32800ae..dd2f78037a5f2 100755 --- a/ci/scripts/e2e-iceberg-sink-v2-test.sh +++ b/ci/scripts/e2e-iceberg-sink-v2-test.sh @@ -37,14 +37,14 @@ bash ./start_spark_connect_server.sh # Don't remove the `--quiet` option since poetry has a bug when printing output, see # https://github.com/python-poetry/poetry/issues/3412 -"$HOME"/.local/bin/poetry update --quiet -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/no_partition_append_only.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/no_partition_upsert.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/partition_append_only.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/partition_upsert.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/range_partition_append_only.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/range_partition_upsert.toml -"$HOME"/.local/bin/poetry run python main.py -t ./test_case/append_only_with_checkpoint_interval.toml +poetry update --quiet +poetry run python main.py -t ./test_case/no_partition_append_only.toml +poetry run python main.py -t ./test_case/no_partition_upsert.toml +poetry run python main.py -t ./test_case/partition_append_only.toml +poetry run python main.py -t ./test_case/partition_upsert.toml +poetry run python main.py -t ./test_case/range_partition_append_only.toml +poetry run python main.py -t ./test_case/range_partition_upsert.toml +poetry run python main.py -t ./test_case/append_only_with_checkpoint_interval.toml echo "--- Kill cluster" diff --git a/ci/scripts/e2e-kafka-sink-test.sh b/ci/scripts/e2e-kafka-sink-test.sh index b1d1f19c8f54d..1dd7a27831d49 100755 --- a/ci/scripts/e2e-kafka-sink-test.sh +++ b/ci/scripts/e2e-kafka-sink-test.sh @@ -144,7 +144,7 @@ sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/drop_sink.slt' # test different encoding echo "preparing confluent schema registry" -python3 -m pip install requests confluent-kafka +python3 -m pip install --break-system-packages requests confluent-kafka echo "testing protobuf" cp src/connector/src/test_data/proto_recursive/recursive.pb ./proto-recursive diff --git a/ci/scripts/e2e-source-test.sh b/ci/scripts/e2e-source-test.sh index 97e1040968604..851fca303ab19 100755 --- a/ci/scripts/e2e-source-test.sh +++ b/ci/scripts/e2e-source-test.sh @@ -138,7 +138,7 @@ echo "--- e2e, ci-1cn-1fe, protobuf schema registry" export RISINGWAVE_CI=true RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \ risedev ci-start ci-1cn-1fe -python3 -m pip install requests protobuf confluent-kafka +python3 -m pip install --break-system-packages requests protobuf confluent-kafka python3 e2e_test/schema_registry/pb.py "message_queue:29092" "http://message_queue:8081" "sr_pb_test" 20 user echo "make sure google/protobuf/source_context.proto is NOT in schema registry" curl --silent 'http://message_queue:8081/subjects'; echo diff --git a/ci/scripts/pulsar-source-test.sh b/ci/scripts/pulsar-source-test.sh index 6f0f7d176b001..86d99ed97f7d2 100755 --- a/ci/scripts/pulsar-source-test.sh +++ b/ci/scripts/pulsar-source-test.sh @@ -45,7 +45,7 @@ echo "--- starting risingwave cluster with connector node" risedev ci-start ci-1cn-1fe echo "--- Run test" -python3 -m pip install psycopg2-binary +python3 -m pip install --break-system-packages psycopg2-binary python3 e2e_test/source/pulsar/astra-streaming.py # python3 e2e_test/source/pulsar/streamnative-cloud.py diff --git a/ci/scripts/s3-source-test-for-opendal-fs-engine.sh b/ci/scripts/s3-source-test-for-opendal-fs-engine.sh index 11bdb8ff2ce62..c0c7e91348ecb 100755 --- a/ci/scripts/s3-source-test-for-opendal-fs-engine.sh +++ b/ci/scripts/s3-source-test-for-opendal-fs-engine.sh @@ -29,7 +29,7 @@ echo "--- starting risingwave cluster with connector node" risedev ci-start ci-3cn-3fe-opendal-fs-backend echo "--- Run test" -python3 -m pip install minio psycopg2-binary +python3 -m pip install --break-system-packages minio psycopg2-binary python3 e2e_test/s3/"$script" echo "--- Kill cluster" diff --git a/ci/scripts/s3-source-test.sh b/ci/scripts/s3-source-test.sh index 9bcb0d830cb4a..532223693a215 100755 --- a/ci/scripts/s3-source-test.sh +++ b/ci/scripts/s3-source-test.sh @@ -32,7 +32,7 @@ echo "--- starting risingwave cluster with connector node" risedev ci-start ci-1cn-1fe echo "--- Run test" -python3 -m pip install minio psycopg2-binary opendal +python3 -m pip install --break-system-packages minio psycopg2-binary opendal if [[ -v format_type ]]; then python3 e2e_test/s3/"$script" "$format_type" else diff --git a/e2e_test/iceberg/pyproject.toml b/e2e_test/iceberg/pyproject.toml index d13be72277592..eba4bcd12f137 100644 --- a/e2e_test/iceberg/pyproject.toml +++ b/e2e_test/iceberg/pyproject.toml @@ -8,7 +8,7 @@ authors = ["risingwavelabs"] python = "^3.10" pyspark = { version = "3.4.1", extras = ["sql", "connect"] } tomli = "2.0" - +setuptools = "69" [build-system] requires = ["poetry-core"] diff --git a/integration_tests/iceberg-sink2/run.sh b/integration_tests/iceberg-sink2/run.sh index d58973f6c7c8f..4fbf17d1116ae 100755 --- a/integration_tests/iceberg-sink2/run.sh +++ b/integration_tests/iceberg-sink2/run.sh @@ -3,9 +3,9 @@ # Exits as soon as any line fails. set -euox pipefail -"$HOME"/.local/bin/poetry --version +poetry --version cd python # Don't remove the `--quiet` option since poetry has a bug when printing output, see # https://github.com/python-poetry/poetry/issues/3412 -"$HOME"/.local/bin/poetry update --quiet -"$HOME"/.local/bin/poetry run python main.py \ No newline at end of file +poetry update --quiet +poetry run python main.py \ No newline at end of file diff --git a/integration_tests/iceberg-source/run.sh b/integration_tests/iceberg-source/run.sh index d58973f6c7c8f..4fbf17d1116ae 100755 --- a/integration_tests/iceberg-source/run.sh +++ b/integration_tests/iceberg-source/run.sh @@ -3,9 +3,9 @@ # Exits as soon as any line fails. set -euox pipefail -"$HOME"/.local/bin/poetry --version +poetry --version cd python # Don't remove the `--quiet` option since poetry has a bug when printing output, see # https://github.com/python-poetry/poetry/issues/3412 -"$HOME"/.local/bin/poetry update --quiet -"$HOME"/.local/bin/poetry run python main.py \ No newline at end of file +poetry update --quiet +poetry run python main.py \ No newline at end of file diff --git a/scripts/source/prepare_ci_kafka.sh b/scripts/source/prepare_ci_kafka.sh index 68f69827bfc7c..e791872822195 100755 --- a/scripts/source/prepare_ci_kafka.sh +++ b/scripts/source/prepare_ci_kafka.sh @@ -3,13 +3,6 @@ # Exits as soon as any line fails. set -e -KCAT_BIN="kcat" -# kcat bin name on linux is "kafkacat" -if [ "$(uname)" == "Linux" ] -then - KCAT_BIN="kafkacat" -fi - SCRIPT_PATH="$(cd "$(dirname "$0")" >/dev/null 2>&1 && pwd)" cd "$SCRIPT_PATH/.." || exit 1 @@ -54,7 +47,7 @@ done wait echo "Fulfill kafka topics" -python3 -m pip install requests fastavro confluent_kafka jsonschema +python3 -m pip install --break-system-packages requests fastavro confluent_kafka jsonschema for filename in $kafka_data_files; do ([ -e "$filename" ] base=$(basename "$filename") @@ -63,20 +56,20 @@ for filename in $kafka_data_files; do echo "Fulfill kafka topic $topic with data from $base" # binary data, one message a file, filename/topic ends with "bin" if [[ "$topic" = *bin ]]; then - ${KCAT_BIN} -P -b message_queue:29092 -t "$topic" "$filename" + kcat -P -b message_queue:29092 -t "$topic" "$filename" elif [[ "$topic" = *avro_json ]]; then python3 source/schema_registry_producer.py "message_queue:29092" "http://message_queue:8081" "$filename" "topic" "avro" elif [[ "$topic" = *json_schema ]]; then python3 source/schema_registry_producer.py "kafka:9093" "http://schemaregistry:8082" "$filename" "topic" "json" else - cat "$filename" | ${KCAT_BIN} -P -K ^ -b message_queue:29092 -t "$topic" + cat "$filename" | kcat -P -K ^ -b message_queue:29092 -t "$topic" fi ) & done # test additional columns: produce messages with headers ADDI_COLUMN_TOPIC="kafka_additional_columns" -for i in {0..100}; do echo "key$i:{\"a\": $i}" | ${KCAT_BIN} -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done +for i in {0..100}; do echo "key$i:{\"a\": $i}" | kcat -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done # write schema with name strategy diff --git a/scripts/source/prepare_data_after_alter.sh b/scripts/source/prepare_data_after_alter.sh index 3225ce5d128d5..a82bd59328653 100644 --- a/scripts/source/prepare_data_after_alter.sh +++ b/scripts/source/prepare_data_after_alter.sh @@ -3,16 +3,9 @@ # Exits as soon as any line fails. set -e -KCAT_BIN="kcat" -# kcat bin name on linux is "kafkacat" -if [ "$(uname)" == "Linux" ] -then - KCAT_BIN="kafkacat" -fi - SCRIPT_PATH="$(cd "$(dirname "$0")" >/dev/null 2>&1 && pwd)" cd "$SCRIPT_PATH/.." || exit 1 FILE="./source/alter_data/kafka_alter.$1" echo "Send data from $FILE" -cat $FILE | ${KCAT_BIN} -P -b message_queue:29092 -t kafka_alter \ No newline at end of file +cat $FILE | kcat -P -b message_queue:29092 -t kafka_alter \ No newline at end of file