diff --git a/ci_v2/docker/fasttest/Dockerfile b/ci_v2/docker/fasttest/Dockerfile new file mode 100644 index 000000000000..02595ad0d0a0 --- /dev/null +++ b/ci_v2/docker/fasttest/Dockerfile @@ -0,0 +1,105 @@ +# docker build -t clickhouse/fasttest . +FROM ubuntu:22.04 + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list + +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 + +RUN apt-get update \ + && apt-get install \ + apt-transport-https \ + apt-utils \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + wget \ + git \ + --yes --no-install-recommends --verbose-versions \ + && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \ + && wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \ + && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \ + && apt-key add /tmp/llvm-snapshot.gpg.key \ + && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ + && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ + /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + +# moreutils - provides ts fo FT +# expect, bzip2 - requried by FT +# bsdmainutils - provides hexdump for FT + +RUN apt-get update \ + && apt-get install \ + clang-${LLVM_VERSION} \ + cmake \ + libclang-${LLVM_VERSION}-dev \ + libclang-rt-${LLVM_VERSION}-dev \ + lld-${LLVM_VERSION} \ + llvm-${LLVM_VERSION}-dev \ + lsof \ + ninja-build \ + python3 \ + python3-pip \ + zstd \ + moreutils \ + expect \ + bsdmainutils \ + pv \ + jq \ + bzip2 \ + --yes --no-install-recommends \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +# Give suid to gdb to grant it attach permissions +RUN chmod u+s /opt/gdb/bin/gdb +ENV PATH="/opt/gdb/bin:${PATH}" + +# This symlink is required by gcc to find the lld linker +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake + +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.7.7 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 +RUN arch=${TARGETARCH} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r + +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt + +# chmod 777 to make the container user independent +RUN mkdir -p /var/lib/clickhouse \ + && chmod 777 /var/lib/clickhouse + +ENV TZ=Europe/Amsterdam +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN groupadd --system --gid 1000 clickhouse \ + && useradd --system --gid 1000 --uid 1000 -m clickhouse \ + && mkdir -p /.cache/sccache && chmod 777 /.cache/sccache + +ENV PYTHONPATH="/wd" +ENV PYTHONUNBUFFERED=1 diff --git a/ci_v2/docker/fasttest/requirements.txt b/ci_v2/docker/fasttest/requirements.txt new file mode 100644 index 000000000000..a1488ee33f01 --- /dev/null +++ b/ci_v2/docker/fasttest/requirements.txt @@ -0,0 +1,6 @@ +Jinja2==3.1.3 +numpy==1.26.4 +requests==2.32.3 +pandas==1.5.3 +scipy==1.12.0 +#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl diff --git a/ci_v2/docker/style-test/requirements.txt b/ci_v2/docker/style-test/requirements.txt index 987b014d9ba1..ab48f245fd28 100644 --- a/ci_v2/docker/style-test/requirements.txt +++ b/ci_v2/docker/style-test/requirements.txt @@ -1,4 +1,5 @@ requests==2.32.3 yamllint==1.26.3 codespell==2.2.1 -https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl +#use praktika from CH repo +#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl diff --git a/ci_v2/jobs/check_style.py b/ci_v2/jobs/check_style.py index 4dd3864e865e..1b1b0bf689b5 100644 --- a/ci_v2/jobs/check_style.py +++ b/ci_v2/jobs/check_style.py @@ -2,7 +2,6 @@ import multiprocessing import os import re -import sys from concurrent.futures import ProcessPoolExecutor from pathlib import Path @@ -51,25 +50,6 @@ def run_check_concurrent(check_name, check_function, files, nproc=NPROC): return result -def run_simple_check(check_name, check_function, **kwargs): - stop_watch = Utils.Stopwatch() - - error = check_function(**kwargs) - - result = Result( - name=check_name, - status=Result.Status.SUCCESS if not error else Result.Status.FAILED, - start_time=stop_watch.start_time, - duration=stop_watch.duration, - info=error, - ) - return result - - -def run_check(check_name, check_function, files): - return run_check_concurrent(check_name, check_function, files, nproc=1) - - def check_duplicate_includes(file_path): includes = [] with open(file_path, "r", encoding="utf-8", errors="ignore") as f: @@ -117,7 +97,7 @@ def check_xmllint(file_paths): def check_functional_test_cases(files): """ Queries with event_date should have yesterday() not today() - NOTE: it is not that accuate, but at least something. + NOTE: it is not that accurate, but at least something. """ patterns = [ @@ -345,66 +325,58 @@ def check_file_names(files): ) ) results.append( - run_check( - check_name="Check Tests Numbers", - check_function=check_gaps_in_tests_numbers, - files=functional_test_files, + Result.create_from_command_execution( + name="Check Tests Numbers", + command=check_gaps_in_tests_numbers, + command_args=[functional_test_files], ) ) results.append( - run_simple_check( - check_name="Check Broken Symlinks", - check_function=check_broken_links, - path="./", - exclude_paths=["contrib/", "metadata/", "programs/server/data"], + Result.create_from_command_execution( + name="Check Broken Symlinks", + command=check_broken_links, + command_kwargs={ + "path": "./", + "exclude_paths": ["contrib/", "metadata/", "programs/server/data"], + }, ) ) results.append( - run_simple_check( - check_name="Check CPP code", - check_function=check_cpp_code, + Result.create_from_command_execution( + name="Check CPP code", + command=check_cpp_code, ) ) results.append( - run_simple_check( - check_name="Check Submodules", - check_function=check_repo_submodules, + Result.create_from_command_execution( + name="Check Submodules", + command=check_repo_submodules, ) ) results.append( - run_check( - check_name="Check File Names", - check_function=check_file_names, - files=all_files, + Result.create_from_command_execution( + name="Check File Names", + command=check_file_names, + command_args=[all_files], ) ) results.append( - run_simple_check( - check_name="Check Many Different Things", - check_function=check_other, + Result.create_from_command_execution( + name="Check Many Different Things", + command=check_other, ) ) results.append( - run_simple_check( - check_name="Check Codespell", - check_function=check_codespell, + Result.create_from_command_execution( + name="Check Codespell", + command=check_codespell, ) ) results.append( - run_simple_check( - check_name="Check Aspell", - check_function=check_aspell, + Result.create_from_command_execution( + name="Check Aspell", + command=check_aspell, ) ) - res = Result.create_from(results=results, stopwatch=stop_watch).dump() - - if not res.is_ok(): - print("Style check: failed") - for result in results: - if not result.is_ok(): - print("Failed check:") - print(" | ", result) - sys.exit(1) - else: - print("Style check: ok") + Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly() diff --git a/ci_v2/jobs/fast_test.py b/ci_v2/jobs/fast_test.py new file mode 100644 index 000000000000..b82c17aa42c3 --- /dev/null +++ b/ci_v2/jobs/fast_test.py @@ -0,0 +1,329 @@ +import threading +from pathlib import Path + +from ci_v2.jobs.scripts.functional_tests_results import FTResultsProcessor +from praktika.environment import Environment +from praktika.result import Result +from praktika.settings import Settings +from praktika.utils import MetaClasses, Shell, Utils + + +class ClickHouseProc: + def __init__(self): + self.ch_config_dir = f"{Settings.TEMP_DIR}/etc/clickhouse-server" + self.pid_file = f"{self.ch_config_dir}/clickhouse-server.pid" + self.config_file = f"{self.ch_config_dir}/config.xml" + self.user_files_path = f"{self.ch_config_dir}/user_files" + self.test_output_file = f"{Settings.OUTPUT_DIR}/test_result.txt" + self.command = f"clickhouse-server --config-file {self.config_file} --pid-file {self.pid_file} -- --path {self.ch_config_dir} --user_files_path {self.user_files_path} --top_level_domains_path {self.ch_config_dir}/top_level_domains --keeper_server.storage_path {self.ch_config_dir}/coordination" + self.proc = None + self.pid = 0 + nproc = int(Utils.cpu_count() / 2) + self.fast_test_command = f"clickhouse-test --hung-check --fast-tests-only --no-random-settings --no-random-merge-tree-settings --no-long --testname --shard --zookeeper --check-zookeeper-session --order random --print-time --report-logs-stats --jobs {nproc} -- '' | ts '%Y-%m-%d %H:%M:%S' \ + | tee -a \"{self.test_output_file}\"" + # TODO: store info in case of failure + self.info = "" + self.info_file = "" + + Utils.set_env("CLICKHOUSE_CONFIG_DIR", self.ch_config_dir) + Utils.set_env("CLICKHOUSE_CONFIG", self.config_file) + Utils.set_env("CLICKHOUSE_USER_FILES", self.user_files_path) + Utils.set_env("CLICKHOUSE_SCHEMA_FILES", f"{self.ch_config_dir}/format_schemas") + + def start(self): + print("Starting ClickHouse server") + Shell.check(f"rm {self.pid_file}") + + def run_clickhouse(): + self.proc = Shell.run_async( + self.command, verbose=True, suppress_output=True + ) + + thread = threading.Thread(target=run_clickhouse) + thread.daemon = True # Allow program to exit even if thread is still running + thread.start() + + # self.proc = Shell.run_async(self.command, verbose=True) + + started = False + try: + for _ in range(5): + pid = Shell.get_output(f"cat {self.pid_file}").strip() + if not pid: + Utils.sleep(1) + continue + started = True + print(f"Got pid from fs [{pid}]") + _ = int(pid) + break + except Exception: + pass + + if not started: + stdout = self.proc.stdout.read().strip() if self.proc.stdout else "" + stderr = self.proc.stderr.read().strip() if self.proc.stderr else "" + Utils.print_formatted_error("Failed to start ClickHouse", stdout, stderr) + return False + + print(f"ClickHouse server started successfully, pid [{pid}]") + return True + + def wait_ready(self): + res, out, err = 0, "", "" + attempts = 30 + delay = 2 + for attempt in range(attempts): + res, out, err = Shell.get_res_stdout_stderr( + 'clickhouse-client --query "select 1"', verbose=True + ) + if out.strip() == "1": + print("Server ready") + break + else: + print(f"Server not ready, wait") + Utils.sleep(delay) + else: + Utils.print_formatted_error( + f"Server not ready after [{attempts*delay}s]", out, err + ) + return False + return True + + def run_fast_test(self): + if Path(self.test_output_file).exists(): + Path(self.test_output_file).unlink() + exit_code = Shell.run(self.fast_test_command) + return exit_code == 0 + + def terminate(self): + print("Terminate ClickHouse process") + timeout = 10 + if self.proc: + Utils.terminate_process_group(self.proc.pid) + + self.proc.terminate() + try: + self.proc.wait(timeout=10) + print(f"Process {self.proc.pid} terminated gracefully.") + except Exception: + print( + f"Process {self.proc.pid} did not terminate in {timeout} seconds, killing it..." + ) + Utils.terminate_process_group(self.proc.pid, force=True) + self.proc.wait() # Wait for the process to be fully killed + print(f"Process {self.proc} was killed.") + + +def clone_submodules(): + submodules_to_update = [ + "contrib/sysroot", + "contrib/magic_enum", + "contrib/abseil-cpp", + "contrib/boost", + "contrib/zlib-ng", + "contrib/libxml2", + "contrib/libunwind", + "contrib/fmtlib", + "contrib/aklomp-base64", + "contrib/cctz", + "contrib/libcpuid", + "contrib/libdivide", + "contrib/double-conversion", + "contrib/llvm-project", + "contrib/lz4", + "contrib/zstd", + "contrib/fastops", + "contrib/rapidjson", + "contrib/re2", + "contrib/sparsehash-c11", + "contrib/croaring", + "contrib/miniselect", + "contrib/xz", + "contrib/dragonbox", + "contrib/fast_float", + "contrib/NuRaft", + "contrib/jemalloc", + "contrib/replxx", + "contrib/wyhash", + "contrib/c-ares", + "contrib/morton-nd", + "contrib/xxHash", + "contrib/expected", + "contrib/simdjson", + "contrib/liburing", + "contrib/libfiu", + "contrib/incbin", + "contrib/yaml-cpp", + ] + + res = Shell.check("git submodule sync", verbose=True, strict=True) + res = res and Shell.check("git submodule init", verbose=True, strict=True) + res = res and Shell.check( + command=f"xargs --max-procs={min([Utils.cpu_count(), 20])} --null --no-run-if-empty --max-args=1 git submodule update --depth 1 --single-branch", + stdin_str="\0".join(submodules_to_update) + "\0", + timeout=120, + retries=3, + verbose=True, + ) + res = res and Shell.check("git submodule foreach git reset --hard", verbose=True) + res = res and Shell.check("git submodule foreach git checkout @ -f", verbose=True) + res = res and Shell.check("git submodule foreach git clean -xfd", verbose=True) + return res + + +def update_path_ch_config(config_file_path=""): + print("Updating path in clickhouse config") + config_file_path = ( + config_file_path or f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.xml" + ) + ssl_config_file_path = ( + f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.d/ssl_certs.xml" + ) + try: + with open(config_file_path, "r", encoding="utf-8") as file: + content = file.read() + + with open(ssl_config_file_path, "r", encoding="utf-8") as file: + ssl_config_content = file.read() + content = content.replace(">/var/", f">{Settings.TEMP_DIR}/var/") + content = content.replace(">/etc/", f">{Settings.TEMP_DIR}/etc/") + ssl_config_content = ssl_config_content.replace( + ">/etc/", f">{Settings.TEMP_DIR}/etc/" + ) + with open(config_file_path, "w", encoding="utf-8") as file: + file.write(content) + with open(ssl_config_file_path, "w", encoding="utf-8") as file: + file.write(ssl_config_content) + except Exception as e: + print(f"ERROR: failed to update config, exception: {e}") + return False + return True + + +class JobStages(metaclass=MetaClasses.WithIter): + CHECKOUT_SUBMODULES = "checkout" + CMAKE = "cmake" + BUILD = "build" + CONFIG = "config" + TEST = "test" + + +def main(): + stop_watch = Utils.Stopwatch() + + stages = list(JobStages) + stage = Environment.LOCAL_RUN_PARAM or JobStages.CHECKOUT_SUBMODULES + if stage: + assert stage in JobStages, f"--param must be one of [{list(JobStages)}]" + print(f"Job will start from stage [{stage}]") + while stage in stages: + stages.pop(0) + stages.insert(0, stage) + + current_directory = Utils.cwd() + build_dir = f"{Settings.TEMP_DIR}/build" + + Utils.add_to_PATH(f"{build_dir}/programs:{current_directory}/tests") + + res = True + results = [] + + if res and JobStages.CHECKOUT_SUBMODULES in stages: + Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}") + results.append( + Result.create_from_command_execution( + name="Checkout Submodules for Minimal Build", + command=clone_submodules, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.CMAKE in stages: + results.append( + Result.create_from_command_execution( + name="Cmake configuration", + command=f"cmake {current_directory} -DCMAKE_CXX_COMPILER=clang++-18 -DCMAKE_C_COMPILER=clang-18 \ + -DCMAKE_TOOLCHAIN_FILE={current_directory}/cmake/linux/toolchain-x86_64-musl.cmake -DENABLE_LIBRARIES=0 \ + -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 -DENABLE_NURAFT=1 -DENABLE_SIMDJSON=1 \ + -DENABLE_JEMALLOC=1 -DENABLE_LIBURING=1 -DENABLE_YAML_CPP=1 -DCOMPILER_CACHE=sccache", + workdir=build_dir, + with_log=True, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.BUILD in stages: + Shell.check("sccache --show-stats") + results.append( + Result.create_from_command_execution( + name="Build ClickHouse", + command="ninja clickhouse-bundle clickhouse-stripped", + workdir=build_dir, + with_log=True, + ) + ) + Shell.check("sccache --show-stats") + res = results[-1].is_ok() + + if res and JobStages.BUILD in stages: + commands = [ + f"mkdir -p {Settings.OUTPUT_DIR}/binaries", + f"cp ./programs/clickhouse {Settings.OUTPUT_DIR}/binaries/clickhouse", + f"zstd --threads=0 --force programs/clickhouse-stripped -o {Settings.OUTPUT_DIR}/binaries/clickhouse-stripped.zst", + "sccache --show-stats", + "clickhouse-client --version", + "clickhouse-test --help", + ] + results.append( + Result.create_from_command_execution( + name="Check and Compress binary", + command=commands, + workdir=build_dir, + with_log=True, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.CONFIG in stages: + commands = [ + f"rm -rf {Settings.TEMP_DIR}/etc/ && mkdir -p {Settings.TEMP_DIR}/etc/clickhouse-client {Settings.TEMP_DIR}/etc/clickhouse-server", + f"cp {current_directory}/programs/server/config.xml {current_directory}/programs/server/users.xml {Settings.TEMP_DIR}/etc/clickhouse-server/", + f"{current_directory}/tests/config/install.sh {Settings.TEMP_DIR}/etc/clickhouse-server {Settings.TEMP_DIR}/etc/clickhouse-client", + # f"cp -a {current_directory}/programs/server/config.d/log_to_console.xml {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/", + f"rm -f {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/secure_ports.xml", + update_path_ch_config, + ] + results.append( + Result.create_from_command_execution( + name="Install ClickHouse Config", + command=commands, + with_log=True, + ) + ) + res = results[-1].is_ok() + + CH = ClickHouseProc() + if res and JobStages.TEST in stages: + stop_watch_ = Utils.Stopwatch() + step_name = "Start ClickHouse Server" + print(step_name) + res = CH.start() + res = res and CH.wait_ready() + results.append( + Result.create_from(name=step_name, status=res, stopwatch=stop_watch_) + ) + + if res and JobStages.TEST in stages: + step_name = "Tests" + print(step_name) + res = res and CH.run_fast_test() + if res: + results.append(FTResultsProcessor(wd=Settings.OUTPUT_DIR).run()) + + CH.terminate() + + Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly() + + +if __name__ == "__main__": + main() diff --git a/ci_v2/jobs/scripts/check_style/check_cpp.sh b/ci_v2/jobs/scripts/check_style/check_cpp.sh index c84a2c8a1083..7963bf982afc 100755 --- a/ci_v2/jobs/scripts/check_style/check_cpp.sh +++ b/ci_v2/jobs/scripts/check_style/check_cpp.sh @@ -14,7 +14,8 @@ LC_ALL="en_US.UTF-8" ROOT_PATH="." -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' +EXCLUDE='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' +EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettingsDeclaration\.h' # From [1]: # But since array_to_string_internal() in array.c still loops over array @@ -31,7 +32,8 @@ function in_array() } find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | + grep -vP $EXCLUDE_DOCS | xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' | # a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces grep -v -P '(//|:\s+\*|\$\(\()| \)"' @@ -39,12 +41,12 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/n # Tabs find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | - xargs grep $@ -F $'\t' + grep -vP $EXCLUDE | + xargs grep $@ -F $'\t' && echo '^ tabs are not allowed' # // namespace comments are unneeded find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep $@ -P '}\s*//+\s*namespace\s*' # Broken symlinks @@ -52,8 +54,23 @@ find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symli # Duplicated or incorrect setting declarations SETTINGS_FILE=$(mktemp) -cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " " substr($1, 3, length($1) - 3) " SettingsDeclaration" }' > ${SETTINGS_FILE} -find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep "extern const Settings" -T | awk '{print substr($5, 0, length($5) -1) " " substr($4, 9) " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE} +ALL_DECLARATION_FILES=" + $ROOT_PATH/src/Core/Settings.cpp + $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp + $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h" + +cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " Settings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq > ${SETTINGS_FILE} +cat $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " MergeTreeSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} + +# Check that if there are duplicated settings (declared in different objects) they all have the same type (it's simpler to validate style with that assert) +for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sed -e 's/MergeTreeSettings//g' -e 's/Settings//g' | sort | uniq | awk '{ print $1 }' | uniq -d); +do + echo "# Found multiple definitions of setting ${setting} with different types: " + grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }' +done + +# We append all uses of extern found in implementation files to validate them in a single pass and avoid reading the same files over and over +find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -e "^\s*extern const Settings" -e "^\s**extern const MergeTreeSettings" -T | awk '{print substr($5, 0, length($5) -1) " " $4 " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE} # Duplicated or incorrect setting declarations bash $ROOT_PATH/utils/check-style/check-settings-style @@ -76,12 +93,14 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Timer ProfileEvents::Type ProfileEvents::TypeEnum + ProfileEvents::ValueType ProfileEvents::dumpToMapColumn ProfileEvents::getProfileEvents ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME ProfileEvents::keeper_profile_events ProfileEvents::CountersIncrement + ProfileEvents::size CurrentMetrics::add CurrentMetrics::sub @@ -93,6 +112,7 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::values CurrentMetrics::Value CurrentMetrics::keeper_metrics + CurrentMetrics::size ErrorCodes::ErrorCode ErrorCodes::getName @@ -115,7 +135,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # and this matches with zkutil::CreateMode grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp' } | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "extern const $type_of_extern $allowed_chars" } | while read file; do grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do if ! grep -q "$extern_type::$val" $file; then @@ -133,7 +153,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \ # awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do if ! grep -q "extern const $type_of_extern $val" $file; then @@ -146,7 +166,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Duplicates find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" done @@ -154,32 +174,32 @@ done # Three or more consecutive empty lines find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done # Check that every header file has #pragma once in first line find $ROOT_PATH/{src,programs,utils} -name '*.h' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done # Too many exclamation marks find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)." # Exclamation mark in a message find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)." # Trailing whitespaces find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces." # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream" # Forbid std::cerr/std::cout in src (fine in programs/utils) @@ -189,6 +209,7 @@ std_cerr_cout_excludes=( _fuzzer # OK src/Common/ProgressIndication.cpp + src/Common/ProgressTable.cpp # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests src/Common/HashTable/HashTable.h # SensitiveDataMasker::printStats() @@ -215,11 +236,10 @@ std_cerr_cout_excludes=( ) sources_with_std_cerr_cout=( $( find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \ - grep -vP $EXCLUDE_DIRS | \ + grep -vP $EXCLUDE | \ grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \ xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u ) ) - # Exclude comments for src in "${sources_with_std_cerr_cout[@]}"; do # suppress stderr, since it may contain warning for #pargma once in headers @@ -264,23 +284,23 @@ fi # Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead" # Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead" # Forbid mt19937() and random_device() which are outdated and slow find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead" # Require checking return value of close(), # since it can hide fd misuse and break other places. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked" # A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647 @@ -307,18 +327,15 @@ ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or # Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong. find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' && echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong." find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' && echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice." -# There shouldn't be any code snippets under GPL or LGPL -find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" - PATTERN="allow_"; DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u)); [ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt" diff --git a/ci_v2/jobs/scripts/functional_tests_results.py b/ci_v2/jobs/scripts/functional_tests_results.py new file mode 100755 index 000000000000..5ac9d6b985dd --- /dev/null +++ b/ci_v2/jobs/scripts/functional_tests_results.py @@ -0,0 +1,284 @@ +import dataclasses +from typing import List + +from praktika.environment import Environment +from praktika.result import Result + +OK_SIGN = "[ OK " +FAIL_SIGN = "[ FAIL " +TIMEOUT_SIGN = "[ Timeout! " +UNKNOWN_SIGN = "[ UNKNOWN " +SKIPPED_SIGN = "[ SKIPPED " +HUNG_SIGN = "Found hung queries in processlist" +SERVER_DIED_SIGN = "Server died, terminating all processes" +SERVER_DIED_SIGN2 = "Server does not respond to health check" +DATABASE_SIGN = "Database: " + +SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] + +RETRIES_SIGN = "Some tests were restarted" + + +# def write_results(results_file, status_file, results, status): +# with open(results_file, "w", encoding="utf-8") as f: +# out = csv.writer(f, delimiter="\t") +# out.writerows(results) +# with open(status_file, "w", encoding="utf-8") as f: +# out = csv.writer(f, delimiter="\t") +# out.writerow(status) + +BROKEN_TESTS_ANALYZER_TECH_DEBT = [ + "01624_soft_constraints", + # Check after ConstantNode refactoring + "02944_variant_as_common_type", +] + + +class FTResultsProcessor: + @dataclasses.dataclass + class Summary: + total: int + skipped: int + unknown: int + failed: int + success: int + test_results: List[Result] + hung: bool = False + server_died: bool = False + retries: bool = False + success_finish: bool = False + test_end: bool = True + + def __init__(self, wd): + self.tests_output_file = f"{wd}/test_result.txt" + # self.test_results_parsed_file = f"{wd}/test_result.tsv" + # self.status_file = f"{wd}/check_status.tsv" + self.broken_tests = BROKEN_TESTS_ANALYZER_TECH_DEBT + + def _process_test_output(self): + total = 0 + skipped = 0 + unknown = 0 + failed = 0 + success = 0 + hung = False + server_died = False + retries = False + success_finish = False + test_results = [] + test_end = True + + with open(self.tests_output_file, "r", encoding="utf-8") as test_file: + for line in test_file: + original_line = line + line = line.strip() + + if any(s in line for s in SUCCESS_FINISH_SIGNS): + success_finish = True + # Ignore hung check report, since it may be quite large. + # (and may break python parser which has limit of 128KiB for each row). + if HUNG_SIGN in line: + hung = True + break + if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line: + server_died = True + if RETRIES_SIGN in line: + retries = True + if any( + sign in line + for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN) + ): + test_name = line.split(" ")[2].split(":")[0] + + test_time = "" + try: + time_token = line.split("]")[1].strip().split()[0] + float(time_token) + test_time = time_token + except: + pass + + total += 1 + if TIMEOUT_SIGN in line: + if test_name in self.broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "Timeout", test_time, [])) + elif FAIL_SIGN in line: + if test_name in self.broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "FAIL", test_time, [])) + elif UNKNOWN_SIGN in line: + unknown += 1 + test_results.append((test_name, "FAIL", test_time, [])) + elif SKIPPED_SIGN in line: + skipped += 1 + test_results.append((test_name, "SKIPPED", test_time, [])) + else: + if OK_SIGN in line and test_name in self.broken_tests: + skipped += 1 + test_results.append( + ( + test_name, + "NOT_FAILED", + test_time, + [ + "This test passed. Update analyzer_tech_debt.txt.\n" + ], + ) + ) + else: + success += int(OK_SIGN in line) + test_results.append((test_name, "OK", test_time, [])) + test_end = False + elif ( + len(test_results) > 0 + and test_results[-1][1] == "FAIL" + and not test_end + ): + test_results[-1][3].append(original_line) + # Database printed after everything else in case of failures, + # so this is a stop marker for capturing test output. + # + # And it is handled after everything else to include line with database into the report. + if DATABASE_SIGN in line: + test_end = True + + test_results = [ + Result( + name=test[0], + status=test[1], + start_time=None, + duration=float(test[2]), + info="".join(test[3])[:8192], + ) + for test in test_results + ] + + s = self.Summary( + total=total, + skipped=skipped, + unknown=unknown, + failed=failed, + success=success, + test_results=test_results, + hung=hung, + server_died=server_died, + success_finish=success_finish, + retries=retries, + ) + + return s + + def run(self): + state = Result.Status.SUCCESS + s = self._process_test_output() + test_results = s.test_results + + # # Check test_results.tsv for sanitizer asserts, crashes and other critical errors. + # # If the file is present, it's expected to be generated by stress_test.lib check for critical errors + # # In the end this file will be fully regenerated, including both results from critical errors check and + # # functional test results. + # if test_results_path and os.path.exists(test_results_path): + # with open(test_results_path, "r", encoding="utf-8") as test_results_file: + # existing_test_results = list( + # csv.reader(test_results_file, delimiter="\t") + # ) + # for test in existing_test_results: + # if len(test) < 2: + # unknown += 1 + # else: + # test_results.append(test) + # + # if test[1] != "OK": + # failed += 1 + # else: + # success += 1 + + # is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) + # logging.info("Is flaky check: %s", is_flaky_check) + # # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) + # # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. + # if failed != 0 or unknown != 0 or (success == 0 and (not is_flaky_check)): + if s.failed != 0 or s.unknown != 0: + state = Result.Status.FAILED + + if s.hung: + state = Result.Status.FAILED + test_results.append( + Result("Some queries hung", "FAIL", info="Some queries hung") + ) + elif s.server_died: + state = Result.Status.FAILED + # When ClickHouse server crashes, some tests are still running + # and fail because they cannot connect to server + for result in test_results: + if result.status == "FAIL": + result.status = "SERVER_DIED" + test_results.append(Result("Server died", "FAIL", info="Server died")) + elif not s.success_finish: + state = Result.Status.FAILED + test_results.append( + Result("Tests are not finished", "FAIL", info="Tests are not finished") + ) + elif s.retries: + test_results.append( + Result("Some tests restarted", "SKIPPED", info="Some tests restarted") + ) + else: + pass + + # TODO: !!! + # def test_result_comparator(item): + # # sort by status then by check name + # order = { + # "FAIL": 0, + # "SERVER_DIED": 1, + # "Timeout": 2, + # "NOT_FAILED": 3, + # "BROKEN": 4, + # "OK": 5, + # "SKIPPED": 6, + # } + # return order.get(item[1], 10), str(item[0]), item[1] + # + # test_results.sort(key=test_result_comparator) + + return Result.create_from( + name=Environment.JOB_NAME, + results=test_results, + status=state, + files=[self.tests_output_file], + with_info_from_results=False, + ) + + +# if __name__ == "__main__": +# logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") +# parser = argparse.ArgumentParser( +# description="ClickHouse script for parsing results of functional tests" +# ) +# +# parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") +# parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") +# args = parser.parse_args() +# +# broken_tests = [] +# state, description, test_results = process_result( +# args.in_results_dir, +# broken_tests, +# args.in_test_result_file, +# args.in_results_file, +# ) +# logging.info("Result parsed") +# status = (state, description) +# +# +# +# write_results(args.out_results_file, args.out_status_file, test_results, status) +# logging.info("Result written") diff --git a/ci_v2/settings/definitions.py b/ci_v2/settings/definitions.py index 87669cdcf257..4e6a7f213f0f 100644 --- a/ci_v2/settings/definitions.py +++ b/ci_v2/settings/definitions.py @@ -7,6 +7,7 @@ class RunnerLabels: CI_SERVICES = "ci_services" CI_SERVICES_EBS = "ci_services_ebs" + BUILDER = "builder" BASE_BRANCH = "master" @@ -29,142 +30,122 @@ class RunnerLabels: DOCKERS = [ # Docker.Config( # name="clickhouse/binary-builder", - # path="./docker/packager/binary-builder", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/packager/binary-builder", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/cctools", - # path="./docker/packager/cctools", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/packager/cctools", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-old-centos", - # path="./docker/test/compatibility/centos", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/compatibility/centos", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-old-ubuntu", - # path="./docker/test/compatibility/ubuntu", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/compatibility/ubuntu", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-util", - # path="./docker/test/util", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/util", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/integration-test", - # path="./docker/test/integration/base", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/integration/base", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/fuzzer", - # path="./docker/test/fuzzer", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/fuzzer", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/performance-comparison", - # path="./docker/test/performance-comparison", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/performance-comparison", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), - # Docker.Config( - # name="clickhouse/fasttest", - # path="./docker/test/fasttest", - # arm64=True, - # amd64=True, - # depends_on=["clickhouse/test-util"], - # ), + Docker.Config( + name="clickhouse/fasttest", + path="./ci_v2/docker/fasttest", + platforms=Docker.Platforms.arm_amd, + depends_on=[], + ), # Docker.Config( # name="clickhouse/test-base", - # path="./docker/test/base", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/base", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-util"], # ), # Docker.Config( # name="clickhouse/clickbench", - # path="./docker/test/clickbench", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/clickbench", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/keeper-jepsen-test", - # path="./docker/test/keeper-jepsen", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/keeper-jepsen", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/server-jepsen-test", - # path="./docker/test/server-jepsen", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/server-jepsen", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/sqllogic-test", - # path="./docker/test/sqllogic", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/sqllogic", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/sqltest", - # path="./docker/test/sqltest", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/sqltest", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/stateless-test", - # path="./docker/test/stateless", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stateless", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/stateful-test", - # path="./docker/test/stateful", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stateful", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/stateless-test"], # ), # Docker.Config( # name="clickhouse/stress-test", - # path="./docker/test/stress", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stress", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/stateful-test"], # ), # Docker.Config( # name="clickhouse/unit-test", - # path="./docker/test/unit", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/unit", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/integration-tests-runner", - # path="./docker/test/integration/runner", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/integration/runner", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), Docker.Config( @@ -175,9 +156,8 @@ class RunnerLabels: ), # Docker.Config( # name="clickhouse/docs-builder", - # path="./docker/docs/builder", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/docs/builder", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), ] @@ -249,3 +229,4 @@ class RunnerLabels: class JobNames: STYLE_CHECK = "Style Check" + FAST_TEST = "Fast test" diff --git a/ci_v2/workflows/pull_request.py b/ci_v2/workflows/pull_request.py index 226455c77f24..0e96329788bf 100644 --- a/ci_v2/workflows/pull_request.py +++ b/ci_v2/workflows/pull_request.py @@ -16,12 +16,20 @@ run_in_docker="clickhouse/style-test", ) +fast_test_job = Job.Config( + name=JobNames.FAST_TEST, + runs_on=[RunnerLabels.BUILDER], + command="python3 ./ci_v2/jobs/fast_test.py", + run_in_docker="clickhouse/fasttest", +) + workflow = Workflow.Config( name="PR", event=Workflow.Event.PULL_REQUEST, base_branches=[BASE_BRANCH], jobs=[ style_check_job, + fast_test_job, ], dockers=DOCKERS, secrets=SECRETS, @@ -36,9 +44,7 @@ if __name__ == "__main__": - # example: local job test inside praktika environment + # local job test inside praktika environment from praktika.runner import Runner - Runner.generate_dummy_environment(workflow, style_check_job) - - Runner().run(workflow, style_check_job) + Runner().run(workflow, fast_test_job, docker="fasttest", dummy_env=True) diff --git a/praktika/__init__.py b/praktika/__init__.py new file mode 100644 index 000000000000..bde8fd6066ab --- /dev/null +++ b/praktika/__init__.py @@ -0,0 +1,5 @@ +from .artifact import Artifact +from .docker import Docker +from .job import Job +from .secret import Secret +from .workflow import Workflow diff --git a/praktika/__main__.py b/praktika/__main__.py new file mode 100644 index 000000000000..7f472ecd9aee --- /dev/null +++ b/praktika/__main__.py @@ -0,0 +1,94 @@ +import argparse +import sys + +from praktika.html_prepare import Html +from praktika.utils import Utils +from praktika.validator import Validator +from praktika.yaml_generator import YamlGenerator + + +def create_parser(): + parser = argparse.ArgumentParser(prog="python3 -m praktika") + + subparsers = parser.add_subparsers(dest="command", help="Available subcommands") + + run_parser = subparsers.add_parser("run", help="Job Runner") + run_parser.add_argument("--job", help="Job Name", type=str, required=True) + run_parser.add_argument( + "--workflow", + help="Workflow Name (required if job name is not uniq per config)", + type=str, + default="", + ) + run_parser.add_argument( + "--no-docker", + help="Do not run job in docker even if job config says so, for local test", + action="store_true", + ) + run_parser.add_argument( + "--docker", + help="Custom docker image for job run, for local test", + type=str, + default="", + ) + run_parser.add_argument( + "--param", + help="Custom parameter to pass into a job script, it's up to job script how to use it, for local test", + type=str, + default=None, + ) + run_parser.add_argument( + "--ci", + help="When not set - dummy env will be generated, for local test", + action="store_true", + default="", + ) + + _yaml_parser = subparsers.add_parser("yaml", help="Generates Yaml Workflows") + + _html_parser = subparsers.add_parser("html", help="Uploads HTML page for reports") + + return parser + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + + if args.command == "yaml": + Validator().validate() + YamlGenerator().generate() + elif args.command == "html": + Html.prepare() + elif args.command == "run": + from praktika.mangle import _get_workflows + from praktika.runner import Runner + + workflows = _get_workflows(name=args.workflow or None) + job_workflow_pairs = [] + for workflow in workflows: + job = workflow.find_job(args.job, lazy=True) + if job: + job_workflow_pairs.append((job, workflow)) + if not job_workflow_pairs: + Utils.raise_with_error( + f"Failed to find job [{args.job}] workflow [{args.workflow}]" + ) + elif len(job_workflow_pairs) > 1: + Utils.raise_with_error( + f"More than one job [{args.job}] found - try specifying workflow name with --workflow" + ) + else: + job, workflow = job_workflow_pairs[0][0], job_workflow_pairs[0][1] + print(f"Going to run job [{job.name}], workflow [{workflow.name}]") + Runner().run( + workflow=workflow, + job=job, + docker=args.docker, + dummy_env=not args.ci, + no_docker=args.no_docker, + param=args.param, + ) + else: + parser.print_help() + sys.exit(1) diff --git a/praktika/_environment.py b/praktika/_environment.py new file mode 100644 index 000000000000..ca84def1d29e --- /dev/null +++ b/praktika/_environment.py @@ -0,0 +1,195 @@ +import dataclasses +import json +import os +from pathlib import Path +from types import SimpleNamespace +from typing import Any, Dict, List, Type + +from praktika import Workflow +from praktika._settings import _Settings +from praktika.utils import MetaClasses, T + + +@dataclasses.dataclass +class _Environment(MetaClasses.Serializable): + WORKFLOW_NAME: str + JOB_NAME: str + REPOSITORY: str + BRANCH: str + SHA: str + PR_NUMBER: int + EVENT_TYPE: str + JOB_OUTPUT_STREAM: str + EVENT_FILE_PATH: str + CHANGE_URL: str + COMMIT_URL: str + BASE_BRANCH: str + RUN_ID: str + RUN_URL: str + INSTANCE_TYPE: str + INSTANCE_ID: str + INSTANCE_LIFE_CYCLE: str + PARAMETER: Any = None + REPORT_INFO: List[str] = dataclasses.field(default_factory=list) + LOCAL_RUN_PARAM: str = "" + name = "environment" + + @classmethod + def file_name_static(cls, _name=""): + return f"{_Settings.TEMP_DIR}/{cls.name}.json" + + @classmethod + def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T: + JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "") + obj["JOB_OUTPUT_STREAM"] = JOB_OUTPUT_STREAM + if "PARAMETER" in obj: + obj["PARAMETER"] = _to_object(obj["PARAMETER"]) + return cls(**obj) + + def add_info(self, info): + self.REPORT_INFO.append(info) + self.dump() + + @classmethod + def get(cls): + if Path(cls.file_name_static()).is_file(): + return cls.from_fs("environment") + else: + print("WARNING: Environment: get from env") + env = cls.from_env() + env.dump() + return env + + def set_job_name(self, job_name): + self.JOB_NAME = job_name + self.dump() + return self + + @staticmethod + def get_needs_statuses(): + if Path(_Settings.WORKFLOW_STATUS_FILE).is_file(): + with open(_Settings.WORKFLOW_STATUS_FILE, "r", encoding="utf8") as f: + return json.load(f) + else: + print( + f"ERROR: Status file [{_Settings.WORKFLOW_STATUS_FILE}] does not exist" + ) + raise RuntimeError() + + @classmethod + def from_env(cls) -> "_Environment": + WORKFLOW_NAME = os.getenv("GITHUB_WORKFLOW", "") + JOB_NAME = os.getenv("JOB_NAME", "") + REPOSITORY = os.getenv("GITHUB_REPOSITORY", "") + BRANCH = os.getenv("GITHUB_HEAD_REF", "") + + EVENT_FILE_PATH = os.getenv("GITHUB_EVENT_PATH", "") + JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "") + RUN_ID = os.getenv("GITHUB_RUN_ID", "0") + RUN_URL = f"https://github.com/{REPOSITORY}/actions/runs/{RUN_ID}" + BASE_BRANCH = os.getenv("GITHUB_BASE_REF", "") + + if EVENT_FILE_PATH: + with open(EVENT_FILE_PATH, "r", encoding="utf-8") as f: + github_event = json.load(f) + if "pull_request" in github_event: + EVENT_TYPE = Workflow.Event.PULL_REQUEST + PR_NUMBER = github_event["pull_request"]["number"] + SHA = github_event["pull_request"]["head"]["sha"] + CHANGE_URL = github_event["pull_request"]["html_url"] + COMMIT_URL = CHANGE_URL + f"/commits/{SHA}" + elif "commits" in github_event: + EVENT_TYPE = Workflow.Event.PUSH + SHA = github_event["after"] + CHANGE_URL = github_event["head_commit"]["url"] # commit url + PR_NUMBER = 0 + COMMIT_URL = CHANGE_URL + else: + assert False, "TODO: not supported" + else: + print("WARNING: Local execution - dummy Environment will be generated") + SHA = "TEST" + PR_NUMBER = -1 + EVENT_TYPE = Workflow.Event.PUSH + CHANGE_URL = "" + COMMIT_URL = "" + + INSTANCE_TYPE = ( + os.getenv("INSTANCE_TYPE", None) + # or Shell.get_output("ec2metadata --instance-type") + or "" + ) + INSTANCE_ID = ( + os.getenv("INSTANCE_ID", None) + # or Shell.get_output("ec2metadata --instance-id") + or "" + ) + INSTANCE_LIFE_CYCLE = ( + os.getenv("INSTANCE_LIFE_CYCLE", None) + # or Shell.get_output( + # "curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle" + # ) + or "" + ) + + return _Environment( + WORKFLOW_NAME=WORKFLOW_NAME, + JOB_NAME=JOB_NAME, + REPOSITORY=REPOSITORY, + BRANCH=BRANCH, + EVENT_FILE_PATH=EVENT_FILE_PATH, + JOB_OUTPUT_STREAM=JOB_OUTPUT_STREAM, + SHA=SHA, + EVENT_TYPE=EVENT_TYPE, + PR_NUMBER=PR_NUMBER, + RUN_ID=RUN_ID, + CHANGE_URL=CHANGE_URL, + COMMIT_URL=COMMIT_URL, + RUN_URL=RUN_URL, + BASE_BRANCH=BASE_BRANCH, + INSTANCE_TYPE=INSTANCE_TYPE, + INSTANCE_ID=INSTANCE_ID, + INSTANCE_LIFE_CYCLE=INSTANCE_LIFE_CYCLE, + REPORT_INFO=[], + ) + + def get_s3_prefix(self, latest=False): + return self.get_s3_prefix_static(self.PR_NUMBER, self.BRANCH, self.SHA, latest) + + @classmethod + def get_s3_prefix_static(cls, pr_number, branch, sha, latest=False): + prefix = "" + if pr_number > 0: + prefix += f"{pr_number}" + else: + prefix += f"{branch}" + if latest: + prefix += f"/latest" + elif sha: + prefix += f"/{sha}" + return prefix + + # TODO: find a better place for the function. This file should not import praktika.settings + # as it's requires reading users config, that's why imports nested inside the function + def get_report_url(self): + import urllib + + from praktika.settings import Settings + from praktika.utils import Utils + + path = Settings.HTML_S3_PATH + for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items(): + if bucket in path: + path = path.replace(bucket, endpoint) + break + REPORT_URL = f"https://{path}/{Path(Settings.HTML_PAGE_FILE).name}?PR={self.PR_NUMBER}&sha={self.SHA}&name_0={urllib.parse.quote(self.WORKFLOW_NAME, safe='')}&name_1={urllib.parse.quote(self.JOB_NAME, safe='')}" + return REPORT_URL + + +def _to_object(data): + if isinstance(data, dict): + return SimpleNamespace(**{k: _to_object(v) for k, v in data.items()}) + elif isinstance(data, list): + return [_to_object(i) for i in data] + else: + return data diff --git a/praktika/_settings.py b/praktika/_settings.py new file mode 100644 index 000000000000..bfd7ba6c1be4 --- /dev/null +++ b/praktika/_settings.py @@ -0,0 +1,128 @@ +import dataclasses +from pathlib import Path +from typing import Dict, Iterable, List, Optional + + +@dataclasses.dataclass +class _Settings: + ###################################### + # Pipeline generation settings # + ###################################### + if Path("./ci_v2").is_dir(): + # TODO: hack for CH, remove + CI_PATH = "./ci_v2" + else: + CI_PATH = "./ci" + WORKFLOW_PATH_PREFIX: str = "./.github/workflows" + WORKFLOWS_DIRECTORY: str = f"{CI_PATH}/workflows" + SETTINGS_DIRECTORY: str = f"{CI_PATH}/settings" + CI_CONFIG_JOB_NAME = "Config Workflow" + DOCKER_BUILD_JOB_NAME = "Docker Builds" + FINISH_WORKFLOW_JOB_NAME = "Finish Workflow" + READY_FOR_MERGE_STATUS_NAME = "Ready for Merge" + CI_CONFIG_RUNS_ON: Optional[List[str]] = None + DOCKER_BUILD_RUNS_ON: Optional[List[str]] = None + VALIDATE_FILE_PATHS: bool = True + + ###################################### + # Runtime Settings # + ###################################### + MAX_RETRIES_S3 = 3 + MAX_RETRIES_GH = 3 + + ###################################### + # S3 (artifact storage) settings # + ###################################### + S3_ARTIFACT_PATH: str = "" + + ###################################### + # CI workspace settings # + ###################################### + TEMP_DIR: str = "/tmp/praktika" + OUTPUT_DIR: str = f"{TEMP_DIR}/output" + INPUT_DIR: str = f"{TEMP_DIR}/input" + PYTHON_INTERPRETER: str = "python3" + PYTHON_PACKET_MANAGER: str = "pip3" + PYTHON_VERSION: str = "3.9" + INSTALL_PYTHON_FOR_NATIVE_JOBS: bool = False + INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS: str = "./ci/requirements.txt" + ENVIRONMENT_VAR_FILE: str = f"{TEMP_DIR}/environment.json" + RUN_LOG: str = f"{TEMP_DIR}/praktika_run.log" + + SECRET_GH_APP_ID: str = "GH_APP_ID" + SECRET_GH_APP_PEM_KEY: str = "GH_APP_PEM_KEY" + + ENV_SETUP_SCRIPT: str = "/tmp/praktika_setup_env.sh" + WORKFLOW_STATUS_FILE: str = f"{TEMP_DIR}/workflow_status.json" + + ###################################### + # CI Cache settings # + ###################################### + CACHE_VERSION: int = 1 + CACHE_DIGEST_LEN: int = 20 + CACHE_S3_PATH: str = "" + CACHE_LOCAL_PATH: str = f"{TEMP_DIR}/ci_cache" + + ###################################### + # Report settings # + ###################################### + HTML_S3_PATH: str = "" + HTML_PAGE_FILE: str = "./praktika/json.html" + TEXT_CONTENT_EXTENSIONS: Iterable[str] = frozenset([".txt", ".log"]) + S3_BUCKET_TO_HTTP_ENDPOINT: Optional[Dict[str, str]] = None + + DOCKERHUB_USERNAME: str = "" + DOCKERHUB_SECRET: str = "" + DOCKER_WD: str = "/wd" + + ###################################### + # CI DB Settings # + ###################################### + SECRET_CI_DB_URL: str = "CI_DB_URL" + SECRET_CI_DB_PASSWORD: str = "CI_DB_PASSWORD" + CI_DB_DB_NAME = "" + CI_DB_TABLE_NAME = "" + CI_DB_INSERT_TIMEOUT_SEC = 5 + + +_USER_DEFINED_SETTINGS = [ + "S3_ARTIFACT_PATH", + "CACHE_S3_PATH", + "HTML_S3_PATH", + "S3_BUCKET_TO_HTTP_ENDPOINT", + "TEXT_CONTENT_EXTENSIONS", + "TEMP_DIR", + "OUTPUT_DIR", + "INPUT_DIR", + "CI_CONFIG_RUNS_ON", + "DOCKER_BUILD_RUNS_ON", + "CI_CONFIG_JOB_NAME", + "PYTHON_INTERPRETER", + "PYTHON_VERSION", + "PYTHON_PACKET_MANAGER", + "INSTALL_PYTHON_FOR_NATIVE_JOBS", + "INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS", + "MAX_RETRIES_S3", + "MAX_RETRIES_GH", + "VALIDATE_FILE_PATHS", + "DOCKERHUB_USERNAME", + "DOCKERHUB_SECRET", + "READY_FOR_MERGE_STATUS_NAME", + "SECRET_CI_DB_URL", + "SECRET_CI_DB_PASSWORD", + "CI_DB_DB_NAME", + "CI_DB_TABLE_NAME", + "CI_DB_INSERT_TIMEOUT_SEC", + "SECRET_GH_APP_PEM_KEY", + "SECRET_GH_APP_ID", +] + + +class GHRunners: + ubuntu = "ubuntu-latest" + + +if __name__ == "__main__": + for setting in _USER_DEFINED_SETTINGS: + print(_Settings().__getattribute__(setting)) + # print(dataclasses.asdict(_Settings())) diff --git a/praktika/artifact.py b/praktika/artifact.py new file mode 100644 index 000000000000..ba05f18b9b16 --- /dev/null +++ b/praktika/artifact.py @@ -0,0 +1,33 @@ +from dataclasses import dataclass + + +class Artifact: + class Type: + GH = "github" + S3 = "s3" + PHONY = "phony" + + @dataclass + class Config: + """ + name - artifact name + type - artifact type, see Artifact.Type + path - file path or glob, e.g. "path/**/[abc]rtifac?/*" + """ + + name: str + type: str + path: str + _provided_by: str = "" + _s3_path: str = "" + + def is_s3_artifact(self): + return self.type == Artifact.Type.S3 + + @classmethod + def define_artifact(cls, name, type, path): + return cls.Config(name=name, type=type, path=path) + + @classmethod + def define_gh_artifact(cls, name, path): + return cls.define_artifact(name=name, type=cls.Type.GH, path=path) diff --git a/praktika/cache.py b/praktika/cache.py new file mode 100644 index 000000000000..cbaea9b489bb --- /dev/null +++ b/praktika/cache.py @@ -0,0 +1,127 @@ +import dataclasses +import json +from pathlib import Path + +from praktika import Artifact, Job, Workflow +from praktika._environment import _Environment +from praktika.digest import Digest +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Utils + + +class Cache: + @dataclasses.dataclass + class CacheRecord: + class Type: + SUCCESS = "success" + + type: str + sha: str + pr_number: int + branch: str + + def dump(self, path): + with open(path, "w", encoding="utf8") as f: + json.dump(dataclasses.asdict(self), f) + + @classmethod + def from_fs(cls, path): + with open(path, "r", encoding="utf8") as f: + return Cache.CacheRecord(**json.load(f)) + + @classmethod + def from_dict(cls, obj): + return Cache.CacheRecord(**obj) + + def __init__(self): + self.digest = Digest() + self.success = {} # type Dict[str, Any] + + @classmethod + def push_success_record(cls, job_name, job_digest, sha): + type_ = Cache.CacheRecord.Type.SUCCESS + record = Cache.CacheRecord( + type=type_, + sha=sha, + pr_number=_Environment.get().PR_NUMBER, + branch=_Environment.get().BRANCH, + ) + assert ( + Settings.CACHE_S3_PATH + ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache" + record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}" + record_file = Path(Settings.TEMP_DIR) / type_ + record.dump(record_file) + S3.copy_file_to_s3(s3_path=record_path, local_path=record_file) + record_file.unlink() + + def fetch_success(self, job_name, job_digest): + type_ = Cache.CacheRecord.Type.SUCCESS + assert ( + Settings.CACHE_S3_PATH + ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache" + record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}" + record_file_local_dir = ( + f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/" + ) + Path(record_file_local_dir).mkdir(parents=True, exist_ok=True) + + if S3.head_object(record_path): + res = S3.copy_file_from_s3( + s3_path=record_path, local_path=record_file_local_dir + ) + else: + res = None + + if res: + print(f"Cache record found, job [{job_name}], digest [{job_digest}]") + self.success[job_name] = True + return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_) + return None + + +if __name__ == "__main__": + # test + c = Cache() + workflow = Workflow.Config( + name="TEST", + event=Workflow.Event.PULL_REQUEST, + jobs=[ + Job.Config( + name="JobA", + runs_on=["some"], + command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py", + provides=["greet"], + job_requirements=Job.Requirements( + python_requirements_txt="./ci/requirements.txt" + ), + digest_config=Job.CacheDigestConfig( + # example: use glob to include files + include_paths=["./ci/tests/example_1/test_example_consume*.py"], + ), + ), + Job.Config( + name="JobB", + runs_on=["some"], + command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py", + requires=["greet"], + job_requirements=Job.Requirements( + python_requirements_txt="./ci/requirements.txt" + ), + digest_config=Job.CacheDigestConfig( + # example: use dir to include files recursively + include_paths=["./ci/tests/example_1"], + # example: use glob to exclude files from digest + exclude_paths=[ + "./ci/tests/example_1/test_example_consume*", + "./**/*.pyc", + ], + ), + ), + ], + artifacts=[Artifact.Config(type="s3", name="greet", path="hello")], + enable_cache=True, + ) + for job in workflow.jobs: + print(c.digest.calc_job_digest(job)) diff --git a/praktika/cidb.py b/praktika/cidb.py new file mode 100644 index 000000000000..087845ec762b --- /dev/null +++ b/praktika/cidb.py @@ -0,0 +1,136 @@ +import copy +import dataclasses +import json +from typing import Optional + +import requests +from praktika._environment import _Environment +from praktika.result import Result +from praktika.settings import Settings +from praktika.utils import Utils + + +class CIDB: + @dataclasses.dataclass + class TableRecord: + pull_request_number: int + commit_sha: str + commit_url: str + check_name: str + check_status: str + check_duration_ms: int + check_start_time: int + report_url: str + pull_request_url: str + base_ref: str + base_repo: str + head_ref: str + head_repo: str + task_url: str + instance_type: str + instance_id: str + test_name: str + test_status: str + test_duration_ms: Optional[int] + test_context_raw: str + + def __init__(self, url, passwd): + self.url = url + self.auth = { + "X-ClickHouse-User": "default", + "X-ClickHouse-Key": passwd, + } + + @classmethod + def json_data_generator(cls, result: Result): + env = _Environment.get() + base_record = cls.TableRecord( + pull_request_number=env.PR_NUMBER, + commit_sha=env.SHA, + commit_url=env.COMMIT_URL, + check_name=result.name, + check_status=result.status, + check_duration_ms=int(result.duration * 1000), + check_start_time=Utils.timestamp_to_str(result.start_time), + report_url=env.get_report_url(), + pull_request_url=env.CHANGE_URL, + base_ref=env.BASE_BRANCH, + base_repo=env.REPOSITORY, + head_ref=env.BRANCH, + # TODO: remove from table? + head_repo=env.REPOSITORY, + # TODO: remove from table? + task_url="", + instance_type=",".join([env.INSTANCE_TYPE, env.INSTANCE_LIFE_CYCLE]), + instance_id=env.INSTANCE_ID, + test_name="", + test_status="", + test_duration_ms=None, + test_context_raw=result.info, + ) + yield json.dumps(dataclasses.asdict(base_record)) + for result_ in result.results: + record = copy.deepcopy(base_record) + record.test_name = result_.name + if result_.start_time: + record.check_start_time = (Utils.timestamp_to_str(result.start_time),) + record.test_status = result_.status + record.test_duration_ms = int(result_.duration * 1000) + record.test_context_raw = result_.info + yield json.dumps(dataclasses.asdict(record)) + + def insert(self, result: Result): + # Create a session object + params = { + "database": Settings.CI_DB_DB_NAME, + "query": f"INSERT INTO {Settings.CI_DB_TABLE_NAME} FORMAT JSONEachRow", + "date_time_input_format": "best_effort", + "send_logs_level": "warning", + } + + session = requests.Session() + + for json_str in self.json_data_generator(result): + try: + response1 = session.post( + url=self.url, + params=params, + data=json_str, + headers=self.auth, + timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC, + ) + except Exception as ex: + raise ex + + session.close() + + def check(self): + # Create a session object + params = { + "database": Settings.CI_DB_DB_NAME, + "query": f"SELECT 1", + } + try: + response = requests.post( + url=self.url, + params=params, + data="", + headers=self.auth, + timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC, + ) + if not response.ok: + print("ERROR: No connection to CI DB") + return ( + False, + f"ERROR: No connection to CI DB [{response.status_code}/{response.reason}]", + ) + if not response.json() == 1: + print("ERROR: CI DB smoke test failed select 1 == 1") + return ( + False, + f"ERROR: CI DB smoke test failed [select 1 ==> {response.json()}]", + ) + except Exception as ex: + print(f"ERROR: Exception [{ex}]") + return False, "CIDB: ERROR: Exception [{ex}]" + return True, "" diff --git a/praktika/digest.py b/praktika/digest.py new file mode 100644 index 000000000000..44317d5249ea --- /dev/null +++ b/praktika/digest.py @@ -0,0 +1,100 @@ +import dataclasses +import hashlib +from hashlib import md5 +from typing import List + +from praktika import Job +from praktika.docker import Docker +from praktika.settings import Settings +from praktika.utils import Utils + + +class Digest: + def __init__(self): + self.digest_cache = {} + + @staticmethod + def _hash_digest_config(digest_config: Job.CacheDigestConfig) -> str: + data_dict = dataclasses.asdict(digest_config) + hash_obj = md5() + hash_obj.update(str(data_dict).encode()) + hash_string = hash_obj.hexdigest() + return hash_string + + def calc_job_digest(self, job_config: Job.Config): + config = job_config.digest_config + if not config: + return "f" * Settings.CACHE_DIGEST_LEN + + cache_key = self._hash_digest_config(config) + + if cache_key in self.digest_cache: + return self.digest_cache[cache_key] + + included_files = Utils.traverse_paths( + job_config.digest_config.include_paths, + job_config.digest_config.exclude_paths, + sorted=True, + ) + + print(f"calc digest: hash_key [{cache_key}], include [{included_files}] files") + # Sort files to ensure consistent hash calculation + included_files.sort() + + # Calculate MD5 hash + res = "" + if not included_files: + res = "f" * Settings.CACHE_DIGEST_LEN + print(f"NOTE: empty digest config [{config}] - return dummy digest") + else: + hash_md5 = hashlib.md5() + for file_path in included_files: + res = self._calc_file_digest(file_path, hash_md5) + assert res + self.digest_cache[cache_key] = res + return res + + def calc_docker_digest( + self, + docker_config: Docker.Config, + dependency_configs: List[Docker.Config], + hash_md5=None, + ): + """ + + :param hash_md5: + :param dependency_configs: list of Docker.Config(s) that :param docker_config: depends on + :param docker_config: Docker.Config to calculate digest for + :return: + """ + print(f"Calculate digest for docker [{docker_config.name}]") + paths = Utils.traverse_path(docker_config.path, sorted=True) + if not hash_md5: + hash_md5 = hashlib.md5() + + dependencies = [] + for dependency_name in docker_config.depends_on: + for dependency_config in dependency_configs: + if dependency_config.name == dependency_name: + print( + f"Add docker [{dependency_config.name}] as dependency for docker [{docker_config.name}] digest calculation" + ) + dependencies.append(dependency_config) + + for dependency in dependencies: + _ = self.calc_docker_digest(dependency, dependency_configs, hash_md5) + + for path in paths: + _ = self._calc_file_digest(path, hash_md5=hash_md5) + + return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN] + + @staticmethod + def _calc_file_digest(file_path, hash_md5): + # Calculate MD5 hash + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + + res = hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN] + return res diff --git a/praktika/docker.py b/praktika/docker.py new file mode 100644 index 000000000000..82e97b4624c1 --- /dev/null +++ b/praktika/docker.py @@ -0,0 +1,60 @@ +import dataclasses +from typing import List + +from praktika.utils import Shell + + +class Docker: + class Platforms: + ARM = "linux/arm64" + AMD = "linux/amd64" + arm_amd = [ARM, AMD] + + @dataclasses.dataclass + class Config: + name: str + path: str + depends_on: List[str] + platforms: List[str] + + @classmethod + def build(cls, config: "Docker.Config", log_file, digests, add_latest): + tags_substr = f" -t {config.name}:{digests[config.name]}" + if add_latest: + tags_substr = f" -t {config.name}:latest" + + from_tag = "" + if config.depends_on: + assert ( + len(config.depends_on) == 1 + ), f"Only one dependency in depends_on is currently supported, docker [{config}]" + from_tag = f" --build-arg FROM_TAG={digests[config.depends_on[0]]}" + + command = f"docker buildx build --platform {','.join(config.platforms)} {tags_substr} {from_tag} --cache-to type=inline --cache-from type=registry,ref={config.name} --push {config.path}" + return Shell.run(command, log_file=log_file, verbose=True) + + @classmethod + def sort_in_build_order(cls, dockers: List["Docker.Config"]): + ready_names = [] + i = 0 + while i < len(dockers): + docker = dockers[i] + if not docker.depends_on or all( + dep in ready_names for dep in docker.depends_on + ): + ready_names.append(docker.name) + i += 1 + else: + dockers.append(dockers.pop(i)) + return dockers + + @classmethod + def login(cls, user_name, user_password): + print("Docker: log in to dockerhub") + return Shell.check( + f"docker login --username '{user_name}' --password-stdin", + strict=True, + stdin_str=user_password, + encoding="utf-8", + verbose=True, + ) diff --git a/praktika/environment.py b/praktika/environment.py new file mode 100644 index 000000000000..8f53aa6230bc --- /dev/null +++ b/praktika/environment.py @@ -0,0 +1,3 @@ +from praktika._environment import _Environment + +Environment = _Environment.get() diff --git a/praktika/execution/__init__.py b/praktika/execution/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/praktika/execution/__main__.py b/praktika/execution/__main__.py new file mode 100644 index 000000000000..c1f08fcca6a7 --- /dev/null +++ b/praktika/execution/__main__.py @@ -0,0 +1,4 @@ +from praktika.execution.machine_init import run + +if __name__ == "__main__": + run() diff --git a/praktika/execution/execution_settings.py b/praktika/execution/execution_settings.py new file mode 100644 index 000000000000..d04b9a773ec7 --- /dev/null +++ b/praktika/execution/execution_settings.py @@ -0,0 +1,31 @@ +import os + +from praktika.utils import MetaClasses + + +class ScalingType(metaclass=MetaClasses.WithIter): + DISABLED = "disabled" + AUTOMATIC_SCALE_DOWN = "scale_down" + AUTOMATIC_SCALE_UP_DOWN = "scale" + + +class DefaultExecutionSettings: + GH_ACTIONS_DIRECTORY: str = "/home/ubuntu/gh_actions" + RUNNER_SCALING_TYPE: str = ScalingType.AUTOMATIC_SCALE_UP_DOWN + MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC: int = 30 + + +class ExecutionSettings: + GH_ACTIONS_DIRECTORY = os.getenv( + "GH_ACTIONS_DIRECTORY", DefaultExecutionSettings.GH_ACTIONS_DIRECTORY + ) + RUNNER_SCALING_TYPE = os.getenv( + "RUNNER_SCALING_TYPE", DefaultExecutionSettings.RUNNER_SCALING_TYPE + ) + MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC = int( + os.getenv( + "MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC", + DefaultExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC, + ) + ) + LOCAL_EXECUTION = bool(os.getenv("CLOUD", "0") == "0") diff --git a/praktika/execution/machine_init.py b/praktika/execution/machine_init.py new file mode 100644 index 000000000000..7829538c5a91 --- /dev/null +++ b/praktika/execution/machine_init.py @@ -0,0 +1,338 @@ +import os +import platform +import signal +import time +import traceback + +import requests +from praktika.execution.execution_settings import ExecutionSettings, ScalingType +from praktika.utils import ContextManager, Shell + + +class StateMachine: + class StateNames: + INIT = "init" + WAIT = "wait" + RUN = "run" + + def __init__(self): + self.state = self.StateNames.INIT + self.scale_type = ExecutionSettings.RUNNER_SCALING_TYPE + self.machine = Machine(scaling_type=self.scale_type).update_instance_info() + self.state_updated_at = int(time.time()) + self.forked = False + + def kick(self): + if self.state == self.StateNames.INIT: + self.machine.config_actions().run_actions_async() + print("State Machine: INIT -> WAIT") + self.state = self.StateNames.WAIT + self.state_updated_at = int(time.time()) + # TODO: add monitoring + if not self.machine.is_actions_process_healthy(): + print(f"ERROR: GH runner process unexpectedly died") + self.machine.self_terminate(decrease_capacity=False) + elif self.state == self.StateNames.WAIT: + res = self.machine.check_job_assigned() + if res: + print("State Machine: WAIT -> RUN") + self.state = self.StateNames.RUN + self.state_updated_at = int(time.time()) + self.check_scale_up() + else: + self.check_scale_down() + elif self.state == self.StateNames.RUN: + res = self.machine.check_job_running() + if res: + pass + else: + print("State Machine: RUN -> INIT") + self.state = self.StateNames.INIT + self.state_updated_at = int(time.time()) + + def check_scale_down(self): + if self.scale_type not in ( + ScalingType.AUTOMATIC_SCALE_DOWN, + ScalingType.AUTOMATIC_SCALE_UP_DOWN, + ): + return + if ScalingType.AUTOMATIC_SCALE_UP_DOWN and not self.forked: + print( + f"Scaling type is AUTOMATIC_SCALE_UP_DOWN and machine has not run a job - do not scale down" + ) + return + if ( + int(time.time()) - self.state_updated_at + > ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC + ): + print( + f"No job assigned for more than MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC [{ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC}] - scale down the instance" + ) + if not ExecutionSettings.LOCAL_EXECUTION: + self.machine.self_terminate(decrease_capacity=True) + else: + print("Local execution - skip scaling operation") + + def check_scale_up(self): + if self.scale_type not in (ScalingType.AUTOMATIC_SCALE_UP_DOWN,): + return + if self.forked: + print("This instance already forked once - do not scale up") + return + self.machine.self_fork() + self.forked = True + + def run(self): + self.machine.unconfig_actions() + while True: + self.kick() + time.sleep(5) + + def terminate(self): + try: + self.machine.unconfig_actions() + except: + print("WARNING: failed to unconfig runner") + if not ExecutionSettings.LOCAL_EXECUTION: + if self.machine is not None: + self.machine.self_terminate(decrease_capacity=False) + time.sleep(10) + # wait termination + print("ERROR: failed to terminate instance via aws cli - try os call") + os.system("sudo shutdown now") + else: + print("NOTE: Local execution - machine won't be terminated") + + +class Machine: + @staticmethod + def get_latest_gh_actions_release(): + url = f"https://api.github.com/repos/actions/runner/releases/latest" + response = requests.get(url, timeout=5) + if response.status_code == 200: + latest_release = response.json() + return latest_release["tag_name"].removeprefix("v") + else: + print(f"Failed to get the latest release: {response.status_code}") + return None + + def __init__(self, scaling_type): + self.os_name = platform.system().lower() + assert self.os_name == "linux", f"Unsupported OS [{self.os_name}]" + if platform.machine() == "x86_64": + self.arch = "x64" + elif "aarch64" in platform.machine().lower(): + self.arch = "arm64" + else: + assert False, f"Unsupported arch [{platform.machine()}]" + self.instance_id = None + self.asg_name = None + self.runner_api_endpoint = None + self.runner_type = None + self.labels = [] + self.proc = None + assert scaling_type in ScalingType + self.scaling_type = scaling_type + + def install_gh_actions_runner(self): + gh_actions_version = self.get_latest_gh_actions_release() + assert self.os_name and gh_actions_version and self.arch + Shell.check( + f"rm -rf {ExecutionSettings.GH_ACTIONS_DIRECTORY}", + strict=True, + verbose=True, + ) + Shell.check( + f"mkdir {ExecutionSettings.GH_ACTIONS_DIRECTORY}", strict=True, verbose=True + ) + with ContextManager.cd(ExecutionSettings.GH_ACTIONS_DIRECTORY): + Shell.check( + f"curl -O -L https://github.com/actions/runner/releases/download/v{gh_actions_version}/actions-runner-{self.os_name}-{self.arch}-{gh_actions_version}.tar.gz", + strict=True, + verbose=True, + ) + Shell.check(f"tar xzf *tar.gz", strict=True, verbose=True) + Shell.check(f"rm -f *tar.gz", strict=True, verbose=True) + Shell.check(f"sudo ./bin/installdependencies.sh", strict=True, verbose=True) + Shell.check( + f"chown -R ubuntu:ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}", + strict=True, + verbose=True, + ) + + def _get_gh_token_from_ssm(self): + gh_token = Shell.get_output_or_raise( + "/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value" + ) + return gh_token + + def update_instance_info(self): + self.instance_id = Shell.get_output_or_raise("ec2metadata --instance-id") + assert self.instance_id + self.asg_name = Shell.get_output( + f"aws ec2 describe-instances --instance-id {self.instance_id} --query \"Reservations[].Instances[].Tags[?Key=='aws:autoscaling:groupName'].Value\" --output text" + ) + # self.runner_type = Shell.get_output_or_raise( + # f'/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values={self.instance_id}" --query "Tags[?Key==\'github:runner-type\'].Value" --output text' + # ) + self.runner_type = self.asg_name + if ( + self.scaling_type != ScalingType.DISABLED + and not ExecutionSettings.LOCAL_EXECUTION + ): + assert ( + self.asg_name and self.runner_type + ), f"Failed to retrieve ASG name, which is required for scaling_type [{self.scaling_type}]" + org = os.getenv("MY_ORG", "") + assert ( + org + ), "MY_ORG env variable myst be set to use init script for runner machine" + self.runner_api_endpoint = f"https://github.com/{org}" + + self.labels = ["self-hosted", self.runner_type] + return self + + @classmethod + def check_job_assigned(cls): + runner_pid = Shell.get_output_or_raise("pgrep Runner.Listener") + if not runner_pid: + print("check_job_assigned: No runner pid") + return False + log_file = Shell.get_output_or_raise( + f"lsof -p {runner_pid} | grep -o {ExecutionSettings.GH_ACTIONS_DIRECTORY}/_diag/Runner.*log" + ) + if not log_file: + print("check_job_assigned: No log file") + return False + return Shell.check(f"grep -q 'Terminal] .* Running job:' {log_file}") + + def check_job_running(self): + if self.proc is None: + print(f"WARNING: No job started") + return False + exit_code = self.proc.poll() + if exit_code is None: + return True + else: + print(f"Job runner finished with exit code [{exit_code}]") + self.proc = None + return False + + def config_actions(self): + if not self.instance_id: + self.update_instance_info() + token = self._get_gh_token_from_ssm() + assert token and self.instance_id and self.runner_api_endpoint and self.labels + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh --token {token} \ + --url {self.runner_api_endpoint} --ephemeral --unattended --replace \ + --runnergroup Default --labels {','.join(self.labels)} --work wd --name {self.instance_id}" + res = 1 + i = 0 + while i < 10 and res != 0: + res = Shell.run(command) + i += 1 + if res != 0: + print( + f"ERROR: failed to configure GH actions runner after [{i}] attempts, exit code [{res}], retry after 10s" + ) + time.sleep(10) + self._get_gh_token_from_ssm() + if res == 0: + print("GH action runner has been configured") + else: + assert False, "GH actions runner configuration failed" + return self + + def unconfig_actions(self): + token = self._get_gh_token_from_ssm() + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh remove --token {token}" + Shell.check(command, strict=True) + return self + + def run_actions_async(self): + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/run.sh" + self.proc = Shell.run_async(command) + assert self.proc is not None + return self + + def is_actions_process_healthy(self): + try: + if self.proc.poll() is None: + return True + + stdout, stderr = self.proc.communicate() + + if self.proc.returncode != 0: + # Handle failure + print( + f"GH Action process failed with return code {self.proc.returncode}" + ) + print(f"Error output: {stderr}") + return False + else: + print(f"GH Action process is not running") + return False + except Exception as e: + print(f"GH Action process exception: {e}") + return False + + def self_terminate(self, decrease_capacity): + print( + f"WARNING: Self terminate is called, decrease_capacity [{decrease_capacity}]" + ) + traceback.print_stack() + if not self.instance_id: + self.update_instance_info() + assert self.instance_id + command = f"aws autoscaling terminate-instance-in-auto-scaling-group --instance-id {self.instance_id}" + if decrease_capacity: + command += " --should-decrement-desired-capacity" + else: + command += " --no-should-decrement-desired-capacity" + Shell.check( + command=command, + verbose=True, + ) + + def self_fork(self): + current_capacity = Shell.get_output( + f'aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name {self.asg_name} \ + --query "AutoScalingGroups[0].DesiredCapacity" --output text' + ) + current_capacity = int(current_capacity) + if not current_capacity: + print("ERROR: failed to get current capacity - cannot scale up") + return + desired_capacity = current_capacity + 1 + command = f"aws autoscaling set-desired-capacity --auto-scaling-group-name {self.asg_name} --desired-capacity {desired_capacity}" + print(f"Increase capacity [{current_capacity} -> {desired_capacity}]") + res = Shell.check( + command=command, + verbose=True, + ) + if not res: + print("ERROR: failed to increase capacity - cannot scale up") + + +def handle_signal(signum, _frame): + print(f"FATAL: Received signal {signum}") + raise RuntimeError(f"killed by signal {signum}") + + +def run(): + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + m = None + try: + m = StateMachine() + m.run() + except Exception as e: + print(f"FATAL: Exception [{e}] - terminate instance") + time.sleep(10) + if m: + m.terminate() + raise e + + +if __name__ == "__main__": + run() diff --git a/praktika/favicon/lambda_function.py b/praktika/favicon/lambda_function.py new file mode 100644 index 000000000000..7d89566de8c7 --- /dev/null +++ b/praktika/favicon/lambda_function.py @@ -0,0 +1,102 @@ +import base64 +import random +import struct +import zlib + + +def create_favicon(): + # Image dimensions + width = 32 + height = 32 + + # Initialize a transparent background image (RGBA: 4 bytes per pixel) + image_data = bytearray( + [0, 0, 0, 0] * width * height + ) # Set alpha to 0 for transparency + + # Draw 4 vertical lines with color #FAFF68 (RGB: 250, 255, 104) + line_color = [250, 255, 104, 255] # RGBA for #FAFF68 with full opacity + line_width = 4 + space_width = 3 + x_start = space_width + line_number = 4 + + line_height = height - space_width + + for i in range(line_number): + # Randomly pick a starting y position for each line + y_start = random.randint(0, height - 1) + + # Draw the line with random shift along Y-axis + for y in range(line_height): + y_pos = (y + y_start) % height + for x in range(line_width): + pixel_index = (y_pos * width + x_start + x) * 4 + image_data[pixel_index : pixel_index + 4] = line_color + + x_start += line_width + space_width + + # Convert the RGBA image to PNG format + png_data = create_png(width, height, image_data) + + # Convert PNG to ICO format + ico_data = create_ico(png_data) + + return ico_data + + +def create_png(width, height, image_data): + def write_chunk(chunk_type, data): + chunk_len = struct.pack(">I", len(data)) + chunk_crc = struct.pack(">I", zlib.crc32(chunk_type + data) & 0xFFFFFFFF) + return chunk_len + chunk_type + data + chunk_crc + + png_signature = b"\x89PNG\r\n\x1a\n" + ihdr_chunk = struct.pack(">IIBBBBB", width, height, 8, 6, 0, 0, 0) + idat_data = zlib.compress( + b"".join( + b"\x00" + image_data[y * width * 4 : (y + 1) * width * 4] + for y in range(height) + ), + 9, + ) + idat_chunk = write_chunk(b"IDAT", idat_data) + iend_chunk = write_chunk(b"IEND", b"") + + return png_signature + write_chunk(b"IHDR", ihdr_chunk) + idat_chunk + iend_chunk + + +def create_ico(png_data): + # ICO header: reserved (2 bytes), type (2 bytes), image count (2 bytes) + ico_header = struct.pack(" None: + wf = _get_workflows(workflow_name) # type: List[Workflow.Config] + pem = wf[0].get_secret(Settings.SECRET_GH_APP_PEM_KEY).get_value() + assert pem + app_id = wf[0].get_secret(Settings.SECRET_GH_APP_ID).get_value() + # Generate JWT + jwt_token = cls._generate_jwt(app_id, pem) + # Get Installation ID + installation_id = cls._get_installation_id(jwt_token) + # Get Installation Access Token + access_token = cls._get_access_token(jwt_token, installation_id) + Shell.check(f"echo {access_token} | gh auth login --with-token", strict=True) + + +if __name__ == "__main__": + GHAuth.auth(sys.argv[1]) diff --git a/praktika/hook_cache.py b/praktika/hook_cache.py new file mode 100644 index 000000000000..b1b5c654f20c --- /dev/null +++ b/praktika/hook_cache.py @@ -0,0 +1,124 @@ +from praktika._environment import _Environment +from praktika.cache import Cache +from praktika.mangle import _get_workflows +from praktika.runtime import RunConfig +from praktika.settings import Settings +from praktika.utils import Utils + + +class CacheRunnerHooks: + @classmethod + def configure(cls, _workflow): + workflow_config = RunConfig.from_fs(_workflow.name) + cache = Cache() + assert _Environment.get().WORKFLOW_NAME + workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0] + print(f"Workflow Configure, workflow [{workflow.name}]") + assert ( + workflow.enable_cache + ), f"Outdated yaml pipelines or BUG. Configuration must be run only for workflow with enabled cache, workflow [{workflow.name}]" + artifact_digest_map = {} + job_digest_map = {} + for job in workflow.jobs: + if not job.digest_config: + print( + f"NOTE: job [{job.name}] has no Config.digest_config - skip cache check, always run" + ) + digest = cache.digest.calc_job_digest(job_config=job) + job_digest_map[job.name] = digest + if job.provides: + # assign the job digest also to the artifacts it provides + for artifact in job.provides: + artifact_digest_map[artifact] = digest + for job in workflow.jobs: + digests_combined_list = [] + if job.requires: + # include digest of required artifact to the job digest, so that they affect job state + for artifact_name in job.requires: + if artifact_name not in [ + artifact.name for artifact in workflow.artifacts + ]: + # phony artifact assumed to be not affecting jobs that depend on it + continue + digests_combined_list.append(artifact_digest_map[artifact_name]) + digests_combined_list.append(job_digest_map[job.name]) + final_digest = "-".join(digests_combined_list) + workflow_config.digest_jobs[job.name] = final_digest + + assert ( + workflow_config.digest_jobs + ), f"BUG, Workflow with enabled cache must have job digests after configuration, wf [{workflow.name}]" + + print("Check remote cache") + job_to_cache_record = {} + for job_name, job_digest in workflow_config.digest_jobs.items(): + record = cache.fetch_success(job_name=job_name, job_digest=job_digest) + if record: + assert ( + Utils.normalize_string(job_name) + not in workflow_config.cache_success + ) + workflow_config.cache_success.append(job_name) + workflow_config.cache_success_base64.append(Utils.to_base64(job_name)) + job_to_cache_record[job_name] = record + + print("Check artifacts to reuse") + for job in workflow.jobs: + if job.name in workflow_config.cache_success: + if job.provides: + for artifact_name in job.provides: + workflow_config.cache_artifacts[artifact_name] = ( + job_to_cache_record[job.name] + ) + + print(f"Write config to GH's job output") + with open(_Environment.get().JOB_OUTPUT_STREAM, "a", encoding="utf8") as f: + print( + f"DATA={workflow_config.to_json()}", + file=f, + ) + print(f"WorkflowRuntimeConfig: [{workflow_config.to_json(pretty=True)}]") + print( + "Dump WorkflowConfig to fs, the next hooks in this job might want to see it" + ) + workflow_config.dump() + + return workflow_config + + @classmethod + def pre_run(cls, _workflow, _job, _required_artifacts=None): + path_prefixes = [] + if _job.name == Settings.CI_CONFIG_JOB_NAME: + # SPECIAL handling + return path_prefixes + env = _Environment.get() + runtime_config = RunConfig.from_fs(_workflow.name) + required_artifacts = [] + if _required_artifacts: + required_artifacts = _required_artifacts + for artifact in required_artifacts: + if artifact.name in runtime_config.cache_artifacts: + record = runtime_config.cache_artifacts[artifact.name] + print(f"Reuse artifact [{artifact.name}] from [{record}]") + path_prefixes.append( + env.get_s3_prefix_static( + record.pr_number, record.branch, record.sha + ) + ) + else: + path_prefixes.append(env.get_s3_prefix()) + return path_prefixes + + @classmethod + def run(cls, workflow, job): + pass + + @classmethod + def post_run(cls, workflow, job): + if job.name == Settings.CI_CONFIG_JOB_NAME: + return + if job.digest_config: + # cache is enabled, and it's a job that supposed to be cached (has defined digest config) + workflow_runtime = RunConfig.from_fs(workflow.name) + job_digest = workflow_runtime.digest_jobs[job.name] + Cache.push_success_record(job.name, job_digest, workflow_runtime.sha) diff --git a/praktika/hook_html.py b/praktika/hook_html.py new file mode 100644 index 000000000000..c998e817fe71 --- /dev/null +++ b/praktika/hook_html.py @@ -0,0 +1,153 @@ +import urllib.parse +from pathlib import Path + +from praktika._environment import _Environment +from praktika.gh import GH +from praktika.parser import WorkflowConfigParser +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Utils + + +class HtmlRunnerHooks: + @classmethod + def configure(cls, _workflow): + # generate pending Results for all jobs in the workflow + if _workflow.enable_cache: + skip_jobs = RunConfig.from_fs(_workflow.name).cache_success + else: + skip_jobs = [] + + env = _Environment.get() + results = [] + for job in _workflow.jobs: + if job.name not in skip_jobs: + result = Result.generate_pending(job.name) + else: + result = Result.generate_skipped(job.name) + results.append(result) + summary_result = Result.generate_pending(_workflow.name, results=results) + summary_result.aux_links.append(env.CHANGE_URL) + summary_result.aux_links.append(env.RUN_URL) + summary_result.start_time = Utils.timestamp() + page_url = "/".join( + ["https:/", Settings.HTML_S3_PATH, str(Path(Settings.HTML_PAGE_FILE).name)] + ) + for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items(): + page_url = page_url.replace(bucket, endpoint) + # TODO: add support for non-PRs (use branch?) + page_url += f"?PR={env.PR_NUMBER}&sha=latest&name_0={urllib.parse.quote(env.WORKFLOW_NAME, safe='')}" + summary_result.html_link = page_url + + # clean the previous latest results in PR if any + if env.PR_NUMBER: + S3.clean_latest_result() + S3.copy_result_to_s3( + summary_result, + unlock=False, + ) + + print(f"CI Status page url [{page_url}]") + + res1 = GH.post_commit_status( + name=_workflow.name, + status=Result.Status.PENDING, + description="", + url=page_url, + ) + res2 = GH.post_pr_comment( + comment_body=f"Workflow [[{_workflow.name}]({page_url})], commit [{_Environment.get().SHA[:8]}]", + or_update_comment_with_substring=f"Workflow [", + ) + if not (res1 or res2): + print( + "ERROR: Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed" + ) + raise + + @classmethod + def pre_run(cls, _workflow, _job): + result = Result.from_fs(_job.name) + S3.copy_result_from_s3( + Result.file_name_static(_workflow.name), + ) + workflow_result = Result.from_fs(_workflow.name) + workflow_result.update_sub_result(result) + S3.copy_result_to_s3( + workflow_result, + unlock=True, + ) + + @classmethod + def run(cls, _workflow, _job): + pass + + @classmethod + def post_run(cls, _workflow, _job, info_errors): + result = Result.from_fs(_job.name) + env = _Environment.get() + S3.copy_result_from_s3( + Result.file_name_static(_workflow.name), + lock=True, + ) + workflow_result = Result.from_fs(_workflow.name) + print(f"Workflow info [{workflow_result.info}], info_errors [{info_errors}]") + + env_info = env.REPORT_INFO + if env_info: + print( + f"WARNING: some info lines are set in Environment - append to report [{env_info}]" + ) + info_errors += env_info + if info_errors: + info_errors = [f" | {error}" for error in info_errors] + info_str = f"{_job.name}:\n" + info_str += "\n".join(info_errors) + print("Update workflow results with new info") + workflow_result.set_info(info_str) + + old_status = workflow_result.status + + S3.upload_result_files_to_s3(result) + workflow_result.update_sub_result(result) + + skipped_job_results = [] + if not result.is_ok(): + print( + "Current job failed - find dependee jobs in the workflow and set their statuses to skipped" + ) + workflow_config_parsed = WorkflowConfigParser(_workflow).parse() + for dependee_job in workflow_config_parsed.workflow_yaml_config.jobs: + if _job.name in dependee_job.needs: + if _workflow.get_job(dependee_job.name).run_unless_cancelled: + continue + print( + f"NOTE: Set job [{dependee_job.name}] status to [{Result.Status.SKIPPED}] due to current failure" + ) + skipped_job_results.append( + Result( + name=dependee_job.name, + status=Result.Status.SKIPPED, + info=ResultInfo.SKIPPED_DUE_TO_PREVIOUS_FAILURE + + f" [{_job.name}]", + ) + ) + for skipped_job_result in skipped_job_results: + workflow_result.update_sub_result(skipped_job_result) + + S3.copy_result_to_s3( + workflow_result, + unlock=True, + ) + if workflow_result.status != old_status: + print( + f"Update GH commit status [{result.name}]: [{old_status} -> {workflow_result.status}], link [{workflow_result.html_link}]" + ) + GH.post_commit_status( + name=workflow_result.name, + status=GH.convert_to_gh_status(workflow_result.status), + description="", + url=workflow_result.html_link, + ) diff --git a/praktika/hook_interface.py b/praktika/hook_interface.py new file mode 100644 index 000000000000..762ee62eeb17 --- /dev/null +++ b/praktika/hook_interface.py @@ -0,0 +1,43 @@ +from abc import ABC, abstractmethod + +from praktika import Workflow + + +class HookInterface(ABC): + @abstractmethod + def pre_run(self, _workflow, _job): + """ + runs in pre-run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def run(self, _workflow, _job): + """ + runs in run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def post_run(self, _workflow, _job): + """ + runs in post-run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def configure(self, _workflow: Workflow.Config): + """ + runs in initial WorkflowConfig job in run step + :return: + """ + pass diff --git a/praktika/html_prepare.py b/praktika/html_prepare.py new file mode 100644 index 000000000000..54bee2f6bbf0 --- /dev/null +++ b/praktika/html_prepare.py @@ -0,0 +1,10 @@ +from praktika.s3 import S3 +from praktika.settings import Settings + + +class Html: + @classmethod + def prepare(cls): + S3.copy_file_to_s3( + s3_path=Settings.HTML_S3_PATH, local_path=Settings.HTML_PAGE_FILE + ) diff --git a/praktika/job.py b/praktika/job.py new file mode 100644 index 000000000000..d0d4232cfa2d --- /dev/null +++ b/praktika/job.py @@ -0,0 +1,102 @@ +import copy +import json +from dataclasses import dataclass, field +from typing import Any, List, Optional + + +class Job: + @dataclass + class Requirements: + python: bool = False + python_requirements_txt: str = "" + + @dataclass + class CacheDigestConfig: + include_paths: List[str] = field(default_factory=list) + exclude_paths: List[str] = field(default_factory=list) + + @dataclass + class Config: + # Job Name + name: str + + # Machine's label to run job on. For instance [ubuntu-latest] for free gh runner + runs_on: List[str] + + # Job Run Command + command: str + + # What job requires + # May be phony or physical names + requires: List[str] = field(default_factory=list) + + # What job provides + # May be phony or physical names + provides: List[str] = field(default_factory=list) + + job_requirements: Optional["Job.Requirements"] = None + + timeout: int = 1 * 3600 + + digest_config: Optional["Job.CacheDigestConfig"] = None + + run_in_docker: str = "" + + run_unless_cancelled: bool = False + + allow_merge_on_failure: bool = False + + parameter: Any = None + + def parametrize( + self, + parameter: Optional[List[Any]] = None, + runs_on: Optional[List[List[str]]] = None, + timeout: Optional[List[int]] = None, + ): + assert ( + parameter or runs_on + ), "Either :parameter or :runs_on must be non empty list for parametrisation" + if not parameter: + parameter = [None] * len(runs_on) + if not runs_on: + runs_on = [None] * len(parameter) + if not timeout: + timeout = [None] * len(parameter) + assert ( + len(parameter) == len(runs_on) == len(timeout) + ), "Parametrization lists must be of the same size" + + res = [] + for parameter_, runs_on_, timeout_ in zip(parameter, runs_on, timeout): + obj = copy.deepcopy(self) + if parameter_: + obj.parameter = parameter_ + if runs_on_: + obj.runs_on = runs_on_ + if timeout_: + obj.timeout = timeout_ + obj.name = obj.get_job_name_with_parameter() + res.append(obj) + return res + + def get_job_name_with_parameter(self): + name, parameter, runs_on = self.name, self.parameter, self.runs_on + res = name + name_params = [] + if isinstance(parameter, list) or isinstance(parameter, dict): + name_params.append(json.dumps(parameter)) + elif parameter is not None: + name_params.append(parameter) + if runs_on: + assert isinstance(runs_on, list) + name_params.append(json.dumps(runs_on)) + if name_params: + name_params = [str(param) for param in name_params] + res += f" ({', '.join(name_params)})" + + self.name = res + return res + + def __repr__(self): + return self.name diff --git a/praktika/json.html b/praktika/json.html new file mode 100644 index 000000000000..fe7b65a5ec5b --- /dev/null +++ b/praktika/json.html @@ -0,0 +1,727 @@ + + + + + + praktika report + + + + +
+
+
+ + + + + + diff --git a/praktika/mangle.py b/praktika/mangle.py new file mode 100644 index 000000000000..89fc52cf8497 --- /dev/null +++ b/praktika/mangle.py @@ -0,0 +1,137 @@ +import copy +import importlib.util +from pathlib import Path +from typing import Any, Dict + +from praktika import Job +from praktika._settings import _USER_DEFINED_SETTINGS, _Settings +from praktika.utils import ContextManager, Utils + + +def _get_workflows(name=None, file=None): + """ + Gets user's workflow configs + """ + res = [] + + with ContextManager.cd(): + directory = Path(_Settings.WORKFLOWS_DIRECTORY) + for py_file in directory.glob("*.py"): + if file and file not in str(py_file): + continue + module_name = py_file.name.removeprefix(".py") + spec = importlib.util.spec_from_file_location( + module_name, f"{_Settings.WORKFLOWS_DIRECTORY}/{module_name}" + ) + assert spec + foo = importlib.util.module_from_spec(spec) + assert spec.loader + spec.loader.exec_module(foo) + try: + for workflow in foo.WORKFLOWS: + if name: + if name == workflow.name: + print(f"Read workflow [{name}] config from [{module_name}]") + res = [workflow] + break + else: + continue + else: + res += foo.WORKFLOWS + print(f"Read workflow configs from [{module_name}]") + except Exception as e: + print( + f"WARNING: Failed to add WORKFLOWS config from [{module_name}], exception [{e}]" + ) + if not res: + Utils.raise_with_error(f"Failed to find workflow [{name or file}]") + + for workflow in res: + # add native jobs + _update_workflow_with_native_jobs(workflow) + # fill in artifact properties, e.g. _provided_by + _update_workflow_artifacts(workflow) + return res + + +def _update_workflow_artifacts(workflow): + artifact_job = {} + for job in workflow.jobs: + for artifact_name in job.provides: + assert artifact_name not in artifact_job + artifact_job[artifact_name] = job.name + for artifact in workflow.artifacts: + artifact._provided_by = artifact_job[artifact.name] + + +def _update_workflow_with_native_jobs(workflow): + if workflow.dockers: + from praktika.native_jobs import _docker_build_job + + print(f"Enable native job [{_docker_build_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_docker_build_job) + if workflow.enable_cache: + print( + f"Add automatic digest config for [{aux_job.name}] job since cache is enabled" + ) + docker_digest_config = Job.CacheDigestConfig() + for docker_config in workflow.dockers: + docker_digest_config.include_paths.append(docker_config.path) + aux_job.digest_config = docker_digest_config + + workflow.jobs.insert(0, aux_job) + for job in workflow.jobs[1:]: + if not job.requires: + job.requires = [] + job.requires.append(aux_job.name) + + if ( + workflow.enable_cache + or workflow.enable_report + or workflow.enable_merge_ready_status + ): + from praktika.native_jobs import _workflow_config_job + + print(f"Enable native job [{_workflow_config_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_workflow_config_job) + workflow.jobs.insert(0, aux_job) + for job in workflow.jobs[1:]: + if not job.requires: + job.requires = [] + job.requires.append(aux_job.name) + + if workflow.enable_merge_ready_status: + from praktika.native_jobs import _final_job + + print(f"Enable native job [{_final_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_final_job) + for job in workflow.jobs: + aux_job.requires.append(job.name) + workflow.jobs.append(aux_job) + + +def _get_user_settings() -> Dict[str, Any]: + """ + Gets user's settings + """ + res = {} # type: Dict[str, Any] + + directory = Path(_Settings.SETTINGS_DIRECTORY) + for py_file in directory.glob("*.py"): + module_name = py_file.name.removeprefix(".py") + spec = importlib.util.spec_from_file_location( + module_name, f"{_Settings.SETTINGS_DIRECTORY}/{module_name}" + ) + assert spec + foo = importlib.util.module_from_spec(spec) + assert spec.loader + spec.loader.exec_module(foo) + for setting in _USER_DEFINED_SETTINGS: + try: + value = getattr(foo, setting) + res[setting] = value + print(f"Apply user defined setting [{setting} = {value}]") + except Exception as e: + pass + + return res diff --git a/praktika/native_jobs.py b/praktika/native_jobs.py new file mode 100644 index 000000000000..f7fd4ca190b0 --- /dev/null +++ b/praktika/native_jobs.py @@ -0,0 +1,378 @@ +import sys +from typing import Dict + +from praktika import Job, Workflow +from praktika._environment import _Environment +from praktika.cidb import CIDB +from praktika.digest import Digest +from praktika.docker import Docker +from praktika.gh import GH +from praktika.hook_cache import CacheRunnerHooks +from praktika.hook_html import HtmlRunnerHooks +from praktika.mangle import _get_workflows +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Shell, Utils + +assert Settings.CI_CONFIG_RUNS_ON + +_workflow_config_job = Job.Config( + name=Settings.CI_CONFIG_JOB_NAME, + runs_on=Settings.CI_CONFIG_RUNS_ON, + job_requirements=( + Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt=Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS, + ) + if Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS + else None + ), + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.CI_CONFIG_JOB_NAME}'", +) + +_docker_build_job = Job.Config( + name=Settings.DOCKER_BUILD_JOB_NAME, + runs_on=Settings.DOCKER_BUILD_RUNS_ON, + job_requirements=Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt="", + ), + timeout=4 * 3600, + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.DOCKER_BUILD_JOB_NAME}'", +) + +_final_job = Job.Config( + name=Settings.FINISH_WORKFLOW_JOB_NAME, + runs_on=Settings.CI_CONFIG_RUNS_ON, + job_requirements=Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt="", + ), + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.FINISH_WORKFLOW_JOB_NAME}'", + run_unless_cancelled=True, +) + + +def _build_dockers(workflow, job_name): + print(f"Start [{job_name}], workflow [{workflow.name}]") + dockers = workflow.dockers + ready = [] + results = [] + job_status = Result.Status.SUCCESS + job_info = "" + dockers = Docker.sort_in_build_order(dockers) + docker_digests = {} # type: Dict[str, str] + for docker in dockers: + docker_digests[docker.name] = Digest().calc_docker_digest(docker, dockers) + + if not Shell.check( + "docker buildx inspect --bootstrap | grep -q docker-container", verbose=True + ): + print("Install docker container driver") + if not Shell.check( + "docker buildx create --use --name mybuilder --driver docker-container", + verbose=True, + ): + job_status = Result.Status.FAILED + job_info = "Failed to install docker buildx driver" + + if job_status == Result.Status.SUCCESS: + if not Docker.login( + Settings.DOCKERHUB_USERNAME, + user_password=workflow.get_secret(Settings.DOCKERHUB_SECRET).get_value(), + ): + job_status = Result.Status.FAILED + job_info = "Failed to login to dockerhub" + + if job_status == Result.Status.SUCCESS: + for docker in dockers: + assert ( + docker.name not in ready + ), f"All docker names must be uniq [{dockers}]" + stopwatch = Utils.Stopwatch() + info = f"{docker.name}:{docker_digests[docker.name]}" + log_file = f"{Settings.OUTPUT_DIR}/docker_{Utils.normalize_string(docker.name)}.log" + files = [] + + code, out, err = Shell.get_res_stdout_stderr( + f"docker manifest inspect {docker.name}:{docker_digests[docker.name]}" + ) + print( + f"Docker inspect results for {docker.name}:{docker_digests[docker.name]}: exit code [{code}], out [{out}], err [{err}]" + ) + if "no such manifest" in err: + ret_code = Docker.build( + docker, log_file=log_file, digests=docker_digests, add_latest=False + ) + if ret_code == 0: + status = Result.Status.SUCCESS + else: + status = Result.Status.FAILED + job_status = Result.Status.FAILED + info += f", failed with exit code: {ret_code}, see log" + files.append(log_file) + else: + print( + f"Docker image [{docker.name}:{docker_digests[docker.name]} exists - skip build" + ) + status = Result.Status.SKIPPED + ready.append(docker.name) + results.append( + Result( + name=docker.name, + status=status, + info=info, + duration=stopwatch.duration, + start_time=stopwatch.start_time, + files=files, + ) + ) + Result.from_fs(job_name).set_status(job_status).set_results(results).set_info( + job_info + ) + + if job_status != Result.Status.SUCCESS: + sys.exit(1) + + +def _config_workflow(workflow: Workflow.Config, job_name): + def _check_yaml_up_to_date(): + print("Check workflows are up to date") + stop_watch = Utils.Stopwatch() + exit_code, output, err = Shell.get_res_stdout_stderr( + f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}" + ) + info = "" + status = Result.Status.SUCCESS + if exit_code != 0: + info = f"workspace has uncommitted files unexpectedly [{output}]" + status = Result.Status.ERROR + print("ERROR: ", info) + else: + Shell.check(f"{Settings.PYTHON_INTERPRETER} -m praktika --generate") + exit_code, output, err = Shell.get_res_stdout_stderr( + f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}" + ) + if exit_code != 0: + info = f"workspace has outdated workflows [{output}] - regenerate with [python -m praktika --generate]" + status = Result.Status.ERROR + print("ERROR: ", info) + + return ( + Result( + name="Check Workflows updated", + status=status, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + def _check_secrets(secrets): + print("Check Secrets") + stop_watch = Utils.Stopwatch() + infos = [] + for secret_config in secrets: + value = secret_config.get_value() + if not value: + info = f"ERROR: Failed to read secret [{secret_config.name}]" + infos.append(info) + print(info) + + info = "\n".join(infos) + return ( + Result( + name="Check Secrets", + status=(Result.Status.FAILED if infos else Result.Status.SUCCESS), + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + def _check_db(workflow): + stop_watch = Utils.Stopwatch() + res, info = CIDB( + workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(), + workflow.get_secret(Settings.SECRET_CI_DB_PASSWORD).get_value(), + ).check() + return ( + Result( + name="Check CI DB", + status=(Result.Status.FAILED if not res else Result.Status.SUCCESS), + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + print(f"Start [{job_name}], workflow [{workflow.name}]") + results = [] + files = [] + info_lines = [] + job_status = Result.Status.SUCCESS + + workflow_config = RunConfig( + name=workflow.name, + digest_jobs={}, + digest_dockers={}, + sha=_Environment.get().SHA, + cache_success=[], + cache_success_base64=[], + cache_artifacts={}, + ).dump() + + # checks: + result_, info = _check_yaml_up_to_date() + if result_.status != Result.Status.SUCCESS: + print("ERROR: yaml files are outdated - regenerate, commit and push") + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + if workflow.secrets: + result_, info = _check_secrets(workflow.secrets) + if result_.status != Result.Status.SUCCESS: + print(f"ERROR: Invalid secrets in workflow [{workflow.name}]") + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + if workflow.enable_cidb: + result_, info = _check_db(workflow) + if result_.status != Result.Status.SUCCESS: + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + # config: + if workflow.dockers: + print("Calculate docker's digests") + dockers = workflow.dockers + dockers = Docker.sort_in_build_order(dockers) + for docker in dockers: + workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest( + docker, dockers + ) + workflow_config.dump() + + if workflow.enable_cache: + print("Cache Lookup") + stop_watch = Utils.Stopwatch() + workflow_config = CacheRunnerHooks.configure(workflow) + results.append( + Result( + name="Cache Lookup", + status=Result.Status.SUCCESS, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + ) + ) + files.append(RunConfig.file_name_static(workflow.name)) + + workflow_config.dump() + + if workflow.enable_report: + print("Init report") + stop_watch = Utils.Stopwatch() + HtmlRunnerHooks.configure(workflow) + results.append( + Result( + name="Init Report", + status=Result.Status.SUCCESS, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + ) + ) + files.append(Result.file_name_static(workflow.name)) + + Result.from_fs(job_name).set_status(job_status).set_results(results).set_files( + files + ).set_info("\n".join(info_lines)) + + if job_status != Result.Status.SUCCESS: + sys.exit(1) + + +def _finish_workflow(workflow, job_name): + print(f"Start [{job_name}], workflow [{workflow.name}]") + env = _Environment.get() + + print("Check Actions statuses") + print(env.get_needs_statuses()) + + print("Check Workflow results") + S3.copy_result_from_s3( + Result.file_name_static(workflow.name), + lock=False, + ) + workflow_result = Result.from_fs(workflow.name) + + ready_for_merge_status = Result.Status.SUCCESS + ready_for_merge_description = "" + failed_results = [] + update_final_report = False + for result in workflow_result.results: + if result.name == job_name or result.status in ( + Result.Status.SUCCESS, + Result.Status.SKIPPED, + ): + continue + if not result.is_completed(): + print( + f"ERROR: not finished job [{result.name}] in the workflow - set status to error" + ) + result.status = Result.Status.ERROR + # dump workflow result after update - to have an updated result in post + workflow_result.dump() + # add error into env - should apper in the report + env.add_info(ResultInfo.NOT_FINALIZED + f" [{result.name}]") + update_final_report = True + job = workflow.get_job(result.name) + if not job or not job.allow_merge_on_failure: + print( + f"NOTE: Result for [{result.name}] has not ok status [{result.status}]" + ) + ready_for_merge_status = Result.Status.FAILED + failed_results.append(result.name.split("(", maxsplit=1)[0]) # cut name + + if failed_results: + ready_for_merge_description = f"failed: {', '.join(failed_results)}" + + if not GH.post_commit_status( + name=Settings.READY_FOR_MERGE_STATUS_NAME + f" [{workflow.name}]", + status=ready_for_merge_status, + description=ready_for_merge_description, + url="", + ): + print(f"ERROR: failed to set status [{Settings.READY_FOR_MERGE_STATUS_NAME}]") + env.add_info(ResultInfo.GH_STATUS_ERROR) + + if update_final_report: + S3.copy_result_to_s3( + workflow_result, + unlock=False, + ) # no lock - no unlock + + Result.from_fs(job_name).set_status(Result.Status.SUCCESS).set_info( + ready_for_merge_description + ) + + +if __name__ == "__main__": + job_name = sys.argv[1] + assert job_name, "Job name must be provided as input argument" + workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0] + if job_name == Settings.DOCKER_BUILD_JOB_NAME: + _build_dockers(workflow, job_name) + elif job_name == Settings.CI_CONFIG_JOB_NAME: + _config_workflow(workflow, job_name) + elif job_name == Settings.FINISH_WORKFLOW_JOB_NAME: + _finish_workflow(workflow, job_name) + else: + assert False, f"BUG, job name [{job_name}]" diff --git a/praktika/parser.py b/praktika/parser.py new file mode 100644 index 000000000000..95aa27c4576b --- /dev/null +++ b/praktika/parser.py @@ -0,0 +1,258 @@ +import dataclasses +from typing import Any, Dict, List + +from praktika import Artifact, Workflow +from praktika.mangle import _get_workflows + + +class AddonType: + PY = "py" + + +@dataclasses.dataclass +class WorkflowYaml: + @dataclasses.dataclass + class JobYaml: + name: str + needs: List[str] + runs_on: List[str] + artifacts_gh_requires: List["WorkflowYaml.ArtifactYaml"] + artifacts_gh_provides: List["WorkflowYaml.ArtifactYaml"] + addons: List["WorkflowYaml.JobAddonYaml"] + gh_app_auth: bool + run_unless_cancelled: bool + parameter: Any + + def __repr__(self): + return self.name + + @dataclasses.dataclass + class ArtifactYaml: + name: str + provided_by: str + required_by: List[str] + path: str + type: str + + def __repr__(self): + return self.name + + @dataclasses.dataclass + class JobAddonYaml: + install_python: bool + requirements_txt_path: str + + name: str + event: str + branches: List[str] + jobs: List[JobYaml] + job_to_config: Dict[str, JobYaml] + artifact_to_config: Dict[str, ArtifactYaml] + secret_names_gh: List[str] + enable_cache: bool + + +class WorkflowConfigParser: + def __init__(self, config: Workflow.Config): + self.workflow_name = config.name + self.config = config + self.requires_all = [] # type: List[str] + self.provides_all = [] # type: List[str] + self.job_names_all = [] # type: List[str] + self.artifact_to_providing_job_map = {} # type: Dict[str, List[str]] + self.artifact_to_job_requires_map = {} # type: Dict[str, List[str]] + self.artifact_map = {} # type: Dict[str, List[Artifact.Config]] + + self.job_to_provides_artifacts = {} # type: Dict[str, List[Artifact.Config]] + self.job_to_requires_artifacts = {} # type: Dict[str, List[Artifact.Config]] + + self.workflow_yaml_config = WorkflowYaml( + name=self.workflow_name, + event=config.event, + branches=[], + jobs=[], + secret_names_gh=[], + job_to_config={}, + artifact_to_config={}, + enable_cache=False, + ) + + def parse(self): + self.workflow_yaml_config.enable_cache = self.config.enable_cache + + # populate WorkflowYaml.branches + if self.config.event in (Workflow.Event.PUSH,): + assert ( + self.config.branches + ), f'Workflow.Config.branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert ( + not self.config.base_branches + ), f'Workflow.Config.base_branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert isinstance( + self.config.branches, list + ), f'Workflow.Config.branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]' + self.workflow_yaml_config.branches = self.config.branches + elif self.config.event in (Workflow.Event.PULL_REQUEST,): + assert ( + self.config.base_branches + ), f'Workflow.Config.base_branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert ( + not self.config.branches + ), f'Workflow.Config.branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert isinstance( + self.config.base_branches, list + ), f'Workflow.Config.base_branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]' + self.workflow_yaml_config.branches = self.config.base_branches + + # populate WorkflowYaml.artifact_to_config with phony artifacts + for job in self.config.jobs: + assert ( + job.name not in self.workflow_yaml_config.artifact_to_config + ), f"Not uniq Job name [{job.name}], workflow [{self.workflow_name}]" + self.workflow_yaml_config.artifact_to_config[job.name] = ( + WorkflowYaml.ArtifactYaml( + name=job.name, + provided_by=job.name, + required_by=[], + path="", + type=Artifact.Type.PHONY, + ) + ) + + # populate jobs + for job in self.config.jobs: + job_yaml_config = WorkflowYaml.JobYaml( + name=job.name, + addons=[], + artifacts_gh_requires=[], + artifacts_gh_provides=[], + needs=[], + runs_on=[], + gh_app_auth=False, + run_unless_cancelled=job.run_unless_cancelled, + parameter=None, + ) + self.workflow_yaml_config.jobs.append(job_yaml_config) + assert ( + job.name not in self.workflow_yaml_config.job_to_config + ), f"Job name [{job.name}] is not uniq, workflow [{self.workflow_name}]" + self.workflow_yaml_config.job_to_config[job.name] = job_yaml_config + + # populate WorkflowYaml.artifact_to_config + if self.config.artifacts: + for artifact in self.config.artifacts: + assert ( + artifact.name not in self.workflow_yaml_config.artifact_to_config + ), f"Artifact name [{artifact.name}] is not uniq, workflow [{self.workflow_name}]" + artifact_yaml_config = WorkflowYaml.ArtifactYaml( + name=artifact.name, + provided_by="", + required_by=[], + path=artifact.path, + type=artifact.type, + ) + self.workflow_yaml_config.artifact_to_config[artifact.name] = ( + artifact_yaml_config + ) + + # populate ArtifactYaml.provided_by + for job in self.config.jobs: + if job.provides: + for artifact_name in job.provides: + assert ( + artifact_name in self.workflow_yaml_config.artifact_to_config + ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]" + assert not self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by, f"Artifact [{artifact_name}] provided by multiple jobs [{self.workflow_yaml_config.artifact_to_config[artifact_name].provided_by}] and [{job.name}]" + self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by = job.name + + # populate ArtifactYaml.required_by + for job in self.config.jobs: + if job.requires: + for artifact_name in job.requires: + assert ( + artifact_name in self.workflow_yaml_config.artifact_to_config + ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]" + assert self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by, f"Artifact [{artifact_name}] has no job providing it, required by job [{job.name}], workflow [{self.workflow_name}]" + self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].required_by.append(job.name) + + # populate JobYaml.addons + for job in self.config.jobs: + if job.job_requirements: + addon_yaml = WorkflowYaml.JobAddonYaml( + requirements_txt_path=job.job_requirements.python_requirements_txt, + install_python=job.job_requirements.python, + ) + self.workflow_yaml_config.job_to_config[job.name].addons.append( + addon_yaml + ) + + if self.config.enable_report: + for job in self.config.jobs: + # auth required for every job with enabled HTML, so that workflow summary status can be updated + self.workflow_yaml_config.job_to_config[job.name].gh_app_auth = True + + # populate JobYaml.runs_on + for job in self.config.jobs: + self.workflow_yaml_config.job_to_config[job.name].runs_on = job.runs_on + + # populate JobYaml.artifacts_gh_requires, JobYaml.artifacts_gh_provides and JobYaml.needs + for ( + artifact_name, + artifact, + ) in self.workflow_yaml_config.artifact_to_config.items(): + # assert ( + # artifact.provided_by + # and artifact.provided_by in self.workflow_yaml_config.job_to_config + # ), f"Artifact [{artifact_name}] has no valid job providing it [{artifact.provided_by}]" + for job_name in artifact.required_by: + if ( + artifact.provided_by + not in self.workflow_yaml_config.job_to_config[job_name].needs + ): + self.workflow_yaml_config.job_to_config[job_name].needs.append( + artifact.provided_by + ) + if artifact.type in (Artifact.Type.GH,): + self.workflow_yaml_config.job_to_config[ + job_name + ].artifacts_gh_requires.append(artifact) + elif artifact.type in (Artifact.Type.PHONY, Artifact.Type.S3): + pass + else: + assert ( + False + ), f"Artifact [{artifact_name}] has unsupported type [{artifact.type}]" + if not artifact.required_by and artifact.type != Artifact.Type.PHONY: + print( + f"WARNING: Artifact [{artifact_name}] provided by job [{artifact.provided_by}] not required by any job in workflow [{self.workflow_name}]" + ) + if artifact.type == Artifact.Type.GH: + self.workflow_yaml_config.job_to_config[ + artifact.provided_by + ].artifacts_gh_provides.append(artifact) + + # populate JobYaml.parametrize + for job in self.config.jobs: + self.workflow_yaml_config.job_to_config[job.name].parameter = job.parameter + + # populate secrets + for secret_config in self.config.secrets: + if secret_config.is_gh(): + self.workflow_yaml_config.secret_names_gh.append(secret_config.name) + + return self + + +if __name__ == "__main__": + # test + workflows = _get_workflows() + for workflow in workflows: + WorkflowConfigParser(workflow).parse() diff --git a/praktika/result.py b/praktika/result.py new file mode 100644 index 000000000000..3d3c986d5f93 --- /dev/null +++ b/praktika/result.py @@ -0,0 +1,354 @@ +import dataclasses +import datetime +import sys +from collections.abc import Container +from pathlib import Path +from typing import Any, Dict, List, Optional + +from praktika._environment import _Environment +from praktika._settings import _Settings +from praktika.utils import ContextManager, MetaClasses, Shell, Utils + + +@dataclasses.dataclass +class Result(MetaClasses.Serializable): + """ + Represents the outcome of a workflow/job/task or any operation, along with associated metadata. + + This class supports nesting of results to represent tasks with sub-tasks, and includes + various attributes to track status, timing, files, and links. + + Attributes: + name (str): The name of the task. + status (str): The current status of the task. Should be one of the values defined in the Status class. + start_time (Optional[float]): The start time of the task in Unix timestamp format. None if not started. + duration (Optional[float]): The duration of the task in seconds. None if not completed. + results (List[Result]): A list of sub-results representing nested tasks. + files (List[str]): A list of file paths or names related to the result. + links (List[str]): A list of URLs related to the result (e.g., links to reports or resources). + info (str): Additional information about the result. Free-form text. + # TODO: rename + aux_links (List[str]): A list of auxiliary links that provide additional context for the result. + # TODO: remove + html_link (str): A direct link to an HTML representation of the result (e.g., a detailed report page). + + Inner Class: + Status: Defines possible statuses for the task, such as "success", "failure", etc. + """ + + class Status: + SKIPPED = "skipped" + SUCCESS = "success" + FAILED = "failure" + PENDING = "pending" + RUNNING = "running" + ERROR = "error" + + name: str + status: str + start_time: Optional[float] = None + duration: Optional[float] = None + results: List["Result"] = dataclasses.field(default_factory=list) + files: List[str] = dataclasses.field(default_factory=list) + links: List[str] = dataclasses.field(default_factory=list) + info: str = "" + aux_links: List[str] = dataclasses.field(default_factory=list) + html_link: str = "" + + @staticmethod + def create_from( + name="", + results: List["Result"] = None, + stopwatch: Utils.Stopwatch = None, + status="", + files=None, + info="", + with_info_from_results=True, + ): + if isinstance(status, bool): + status = Result.Status.SUCCESS if status else Result.Status.FAILED + if not results and not status: + print("ERROR: Either .results or .status must be provided") + raise + if not name: + name = _Environment.get().JOB_NAME + if not name: + print("ERROR: Failed to guess the .name") + raise + result_status = status or Result.Status.SUCCESS + infos = [] + if info: + if isinstance(info, Container): + infos += info + else: + infos.append(info) + if results and not status: + for result in results: + if result.status not in (Result.Status.SUCCESS, Result.Status.FAILED): + Utils.raise_with_error( + f"Unexpected result status [{result.status}] for Result.create_from call" + ) + if result.status != Result.Status.SUCCESS: + result_status = Result.Status.FAILED + if results: + for result in results: + if result.info and with_info_from_results: + infos.append(f"{result.name}: {result.info}") + return Result( + name=name, + status=result_status, + start_time=stopwatch.start_time if stopwatch else None, + duration=stopwatch.duration if stopwatch else None, + info="\n".join(infos) if infos else "", + results=results or [], + files=files or [], + ) + + @staticmethod + def get(): + return Result.from_fs(_Environment.get().JOB_NAME) + + def is_completed(self): + return self.status not in (Result.Status.PENDING, Result.Status.RUNNING) + + def is_running(self): + return self.status not in (Result.Status.RUNNING,) + + def is_ok(self): + return self.status in (Result.Status.SKIPPED, Result.Status.SUCCESS) + + def set_status(self, status) -> "Result": + self.status = status + self.dump() + return self + + def set_success(self) -> "Result": + return self.set_status(Result.Status.SUCCESS) + + def set_results(self, results: List["Result"]) -> "Result": + self.results = results + self.dump() + return self + + def set_files(self, files) -> "Result": + for file in files: + assert Path( + file + ).is_file(), f"Not valid file [{file}] from file list [{files}]" + if not self.files: + self.files = [] + self.files += files + self.dump() + return self + + def set_info(self, info: str) -> "Result": + if self.info: + self.info += "\n" + self.info += info + self.dump() + return self + + def set_link(self, link) -> "Result": + self.links.append(link) + self.dump() + return self + + @classmethod + def file_name_static(cls, name): + return f"{_Settings.TEMP_DIR}/result_{Utils.normalize_string(name)}.json" + + @classmethod + def from_dict(cls, obj: Dict[str, Any]) -> "Result": + sub_results = [] + for result_dict in obj["results"] or []: + sub_res = cls.from_dict(result_dict) + sub_results.append(sub_res) + obj["results"] = sub_results + return Result(**obj) + + def update_duration(self): + if not self.duration and self.start_time: + self.duration = datetime.datetime.utcnow().timestamp() - self.start_time + else: + if not self.duration: + print( + f"NOTE: duration is set for job [{self.name}] Result - do not update by CI" + ) + else: + print( + f"NOTE: start_time is not set for job [{self.name}] Result - do not update duration" + ) + return self + + def update_sub_result(self, result: "Result"): + assert self.results, "BUG?" + for i, result_ in enumerate(self.results): + if result_.name == result.name: + self.results[i] = result + self._update_status() + return self + + def _update_status(self): + was_pending = False + was_running = False + if self.status == self.Status.PENDING: + was_pending = True + if self.status == self.Status.RUNNING: + was_running = True + + has_pending, has_running, has_failed = False, False, False + for result_ in self.results: + if result_.status in (self.Status.RUNNING,): + has_running = True + if result_.status in (self.Status.PENDING,): + has_pending = True + if result_.status in (self.Status.ERROR, self.Status.FAILED): + has_failed = True + if has_running: + self.status = self.Status.RUNNING + elif has_pending: + self.status = self.Status.PENDING + elif has_failed: + self.status = self.Status.FAILED + else: + self.status = self.Status.SUCCESS + if (was_pending or was_running) and self.status not in ( + self.Status.PENDING, + self.Status.RUNNING, + ): + print("Pipeline finished") + self.update_duration() + + @classmethod + def generate_pending(cls, name, results=None): + return Result( + name=name, + status=Result.Status.PENDING, + start_time=None, + duration=None, + results=results or [], + files=[], + links=[], + info="", + ) + + @classmethod + def generate_skipped(cls, name, results=None): + return Result( + name=name, + status=Result.Status.SKIPPED, + start_time=None, + duration=None, + results=results or [], + files=[], + links=[], + info="from cache", + ) + + @classmethod + def create_from_command_execution( + cls, + name, + command, + with_log=False, + fail_fast=True, + workdir=None, + command_args=None, + command_kwargs=None, + ): + """ + Executes shell commands or Python callables, optionally logging output, and handles errors. + + :param name: Check name + :param command: Shell command (str) or Python callable, or list of them. + :param workdir: Optional working directory. + :param with_log: Boolean flag to log output to a file. + :param fail_fast: Boolean flag to stop execution if one command fails. + :param command_args: Positional arguments for the callable command. + :param command_kwargs: Keyword arguments for the callable command. + :return: Result object with status and optional log file. + """ + + # Stopwatch to track execution time + stop_watch_ = Utils.Stopwatch() + command_args = command_args or [] + command_kwargs = command_kwargs or {} + + # Set log file path if logging is enabled + log_file = ( + f"{_Settings.TEMP_DIR}/{Utils.normalize_string(name)}.log" + if with_log + else None + ) + + # Ensure the command is a list for consistent iteration + if not isinstance(command, list): + fail_fast = False + command = [command] + + print(f"> Starting execution for [{name}]") + res = True # Track success/failure status + error_infos = [] + for command_ in command: + if callable(command_): + # If command is a Python function, call it with provided arguments + result = command_(*command_args, **command_kwargs) + if isinstance(result, bool): + res = result + elif result: + error_infos.append(str(result)) + res = False + else: + # Run shell command in a specified directory with logging and verbosity + with ContextManager.cd(workdir): + exit_code = Shell.run(command_, verbose=True, log_file=log_file) + res = exit_code == 0 + + # If fail_fast is enabled, stop on first failure + if not res and fail_fast: + print(f"Execution stopped due to failure in [{command_}]") + break + + # Create and return the result object with status and log file (if any) + return Result.create_from( + name=name, + status=res, + stopwatch=stop_watch_, + info=error_infos, + files=[log_file] if log_file else None, + ) + + def finish_job_accordingly(self): + self.dump() + if not self.is_ok(): + print("ERROR: Job Failed") + for result in self.results: + if not result.is_ok(): + print("Failed checks:") + print(" | ", result) + sys.exit(1) + else: + print("ok") + + +class ResultInfo: + SETUP_ENV_JOB_FAILED = ( + "Failed to set up job env, it's praktika bug or misconfiguration" + ) + PRE_JOB_FAILED = ( + "Failed to do a job pre-run step, it's praktika bug or misconfiguration" + ) + KILLED = "Job killed or terminated, no Result provided" + NOT_FOUND_IMPOSSIBLE = ( + "No Result file (bug, or job misbehaviour, must not ever happen)" + ) + SKIPPED_DUE_TO_PREVIOUS_FAILURE = "Skipped due to previous failure" + TIMEOUT = "Timeout" + + GH_STATUS_ERROR = "Failed to set GH commit status" + + NOT_FINALIZED = ( + "Job did not not provide Result: job script bug, died CI runner or praktika bug" + ) + + S3_ERROR = "S3 call failure" diff --git a/praktika/runner.py b/praktika/runner.py new file mode 100644 index 000000000000..15e759397ec5 --- /dev/null +++ b/praktika/runner.py @@ -0,0 +1,348 @@ +import os +import re +import sys +import traceback +from pathlib import Path + +from praktika._environment import _Environment +from praktika.artifact import Artifact +from praktika.cidb import CIDB +from praktika.digest import Digest +from praktika.hook_cache import CacheRunnerHooks +from praktika.hook_html import HtmlRunnerHooks +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Shell, TeePopen, Utils + + +class Runner: + @staticmethod + def generate_dummy_environment(workflow, job): + print("WARNING: Generate dummy env for local test") + Shell.check( + f"mkdir -p {Settings.TEMP_DIR} {Settings.INPUT_DIR} {Settings.OUTPUT_DIR}" + ) + _Environment( + WORKFLOW_NAME=workflow.name, + JOB_NAME=job.name, + REPOSITORY="", + BRANCH="", + SHA="", + PR_NUMBER=-1, + EVENT_TYPE="", + JOB_OUTPUT_STREAM="", + EVENT_FILE_PATH="", + CHANGE_URL="", + COMMIT_URL="", + BASE_BRANCH="", + RUN_URL="", + RUN_ID="", + INSTANCE_ID="", + INSTANCE_TYPE="", + INSTANCE_LIFE_CYCLE="", + ).dump() + workflow_config = RunConfig( + name=workflow.name, + digest_jobs={}, + digest_dockers={}, + sha="", + cache_success=[], + cache_success_base64=[], + cache_artifacts={}, + ) + for docker in workflow.dockers: + workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest( + docker, workflow.dockers + ) + workflow_config.dump() + + Result.generate_pending(job.name).dump() + + def _setup_env(self, _workflow, job): + # source env file to write data into fs (workflow config json, workflow status json) + Shell.check(f". {Settings.ENV_SETUP_SCRIPT}", verbose=True, strict=True) + + # parse the same env script and apply envs from python so that this process sees them + with open(Settings.ENV_SETUP_SCRIPT, "r") as f: + content = f.read() + export_pattern = re.compile( + r"export (\w+)=\$\(cat<<\'EOF\'\n(.*?)EOF\n\)", re.DOTALL + ) + matches = export_pattern.findall(content) + for key, value in matches: + value = value.strip() + os.environ[key] = value + print(f"Set environment variable {key}.") + + # TODO: remove + os.environ["PYTHONPATH"] = os.getcwd() + + print("Read GH Environment") + env = _Environment.from_env() + env.JOB_NAME = job.name + env.PARAMETER = job.parameter + env.dump() + print(env) + + return 0 + + def _pre_run(self, workflow, job): + env = _Environment.get() + + result = Result( + name=job.name, + status=Result.Status.RUNNING, + start_time=Utils.timestamp(), + ) + result.dump() + + if workflow.enable_report and job.name != Settings.CI_CONFIG_JOB_NAME: + print("Update Job and Workflow Report") + HtmlRunnerHooks.pre_run(workflow, job) + + print("Download required artifacts") + required_artifacts = [] + if job.requires and workflow.artifacts: + for requires_artifact_name in job.requires: + for artifact in workflow.artifacts: + if ( + artifact.name == requires_artifact_name + and artifact.type == Artifact.Type.S3 + ): + required_artifacts.append(artifact) + print(f"--- Job requires s3 artifacts [{required_artifacts}]") + if workflow.enable_cache: + prefixes = CacheRunnerHooks.pre_run( + _job=job, _workflow=workflow, _required_artifacts=required_artifacts + ) + else: + prefixes = [env.get_s3_prefix()] * len(required_artifacts) + for artifact, prefix in zip(required_artifacts, prefixes): + s3_path = f"{Settings.S3_ARTIFACT_PATH}/{prefix}/{Utils.normalize_string(artifact._provided_by)}/{Path(artifact.path).name}" + assert S3.copy_file_from_s3(s3_path=s3_path, local_path=Settings.INPUT_DIR) + + return 0 + + def _run(self, workflow, job, docker="", no_docker=False, param=None): + if param: + if not isinstance(param, str): + Utils.raise_with_error( + f"Custom param for local tests must be of type str, got [{type(param)}]" + ) + env = _Environment.get() + env.LOCAL_RUN_PARAM = param + env.dump() + print(f"Custom param for local tests [{param}] dumped into Environment") + + if job.run_in_docker and not no_docker: + # TODO: add support for any image, including not from ci config (e.g. ubuntu:latest) + docker_tag = RunConfig.from_fs(workflow.name).digest_dockers[ + job.run_in_docker + ] + docker = docker or f"{job.run_in_docker}:{docker_tag}" + cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}" + else: + cmd = job.command + print(f"--- Run command [{cmd}]") + + with TeePopen(cmd, timeout=job.timeout) as process: + exit_code = process.wait() + + result = Result.from_fs(job.name) + if exit_code != 0: + if not result.is_completed(): + if process.timeout_exceeded: + print( + f"WARNING: Job timed out: [{job.name}], timeout [{job.timeout}], exit code [{exit_code}]" + ) + result.set_status(Result.Status.ERROR).set_info( + ResultInfo.TIMEOUT + ) + elif result.is_running(): + info = f"ERROR: Job terminated with an error, exit code [{exit_code}] - set status to [{Result.Status.ERROR}]" + print(info) + result.set_status(Result.Status.ERROR).set_info(info) + else: + info = f"ERROR: Invalid status [{result.status}] for exit code [{exit_code}] - switch to [{Result.Status.ERROR}]" + print(info) + result.set_status(Result.Status.ERROR).set_info(info) + result.dump() + + return exit_code + + def _post_run( + self, workflow, job, setup_env_exit_code, prerun_exit_code, run_exit_code + ): + info_errors = [] + env = _Environment.get() + result_exist = Result.exist(job.name) + + if setup_env_exit_code != 0: + info = f"ERROR: {ResultInfo.SETUP_ENV_JOB_FAILED}" + print(info) + # set Result with error and logs + Result( + name=job.name, + status=Result.Status.ERROR, + start_time=Utils.timestamp(), + duration=0.0, + info=info, + ).dump() + elif prerun_exit_code != 0: + info = f"ERROR: {ResultInfo.PRE_JOB_FAILED}" + print(info) + # set Result with error and logs + Result( + name=job.name, + status=Result.Status.ERROR, + start_time=Utils.timestamp(), + duration=0.0, + info=info, + ).dump() + elif not result_exist: + info = f"ERROR: {ResultInfo.NOT_FOUND_IMPOSSIBLE}" + print(info) + Result( + name=job.name, + start_time=Utils.timestamp(), + duration=None, + status=Result.Status.ERROR, + info=ResultInfo.NOT_FOUND_IMPOSSIBLE, + ).dump() + + result = Result.from_fs(job.name) + + if not result.is_completed(): + info = f"ERROR: {ResultInfo.KILLED}" + print(info) + result.set_info(info).set_status(Result.Status.ERROR).dump() + + result.set_files(files=[Settings.RUN_LOG]) + result.update_duration().dump() + + if result.info and result.status != Result.Status.SUCCESS: + # provide job info to workflow level + info_errors.append(result.info) + + if run_exit_code == 0: + providing_artifacts = [] + if job.provides and workflow.artifacts: + for provides_artifact_name in job.provides: + for artifact in workflow.artifacts: + if ( + artifact.name == provides_artifact_name + and artifact.type == Artifact.Type.S3 + ): + providing_artifacts.append(artifact) + if providing_artifacts: + print(f"Job provides s3 artifacts [{providing_artifacts}]") + for artifact in providing_artifacts: + try: + assert Shell.check( + f"ls -l {artifact.path}", verbose=True + ), f"Artifact {artifact.path} not found" + s3_path = f"{Settings.S3_ARTIFACT_PATH}/{env.get_s3_prefix()}/{Utils.normalize_string(env.JOB_NAME)}" + link = S3.copy_file_to_s3( + s3_path=s3_path, local_path=artifact.path + ) + result.set_link(link) + except Exception as e: + error = ( + f"ERROR: Failed to upload artifact [{artifact}], ex [{e}]" + ) + print(error) + info_errors.append(error) + result.set_status(Result.Status.ERROR) + + if workflow.enable_cidb: + print("Insert results to CIDB") + try: + CIDB( + url=workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(), + passwd=workflow.get_secret( + Settings.SECRET_CI_DB_PASSWORD + ).get_value(), + ).insert(result) + except Exception as ex: + error = f"ERROR: Failed to insert data into CI DB, exception [{ex}]" + print(error) + info_errors.append(error) + + result.dump() + + # always in the end + if workflow.enable_cache: + print(f"Run CI cache hook") + if result.is_ok(): + CacheRunnerHooks.post_run(workflow, job) + + if workflow.enable_report: + print(f"Run html report hook") + HtmlRunnerHooks.post_run(workflow, job, info_errors) + + return True + + def run( + self, workflow, job, docker="", dummy_env=False, no_docker=False, param=None + ): + res = True + setup_env_code = -10 + prerun_code = -10 + run_code = -10 + + if res and not dummy_env: + print( + f"\n\n=== Setup env script [{job.name}], workflow [{workflow.name}] ===" + ) + try: + setup_env_code = self._setup_env(workflow, job) + # Source the bash script and capture the environment variables + res = setup_env_code == 0 + if not res: + print( + f"ERROR: Setup env script failed with exit code [{setup_env_code}]" + ) + except Exception as e: + print(f"ERROR: Setup env script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Setup env finished ===\n\n") + else: + self.generate_dummy_environment(workflow, job) + + if res and not dummy_env: + res = False + print(f"=== Pre run script [{job.name}], workflow [{workflow.name}] ===") + try: + prerun_code = self._pre_run(workflow, job) + res = prerun_code == 0 + if not res: + print(f"ERROR: Pre-run failed with exit code [{prerun_code}]") + except Exception as e: + print(f"ERROR: Pre-run script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Pre run finished ===\n\n") + + if res: + res = False + print(f"=== Run script [{job.name}], workflow [{workflow.name}] ===") + try: + run_code = self._run( + workflow, job, docker=docker, no_docker=no_docker, param=param + ) + res = run_code == 0 + if not res: + print(f"ERROR: Run failed with exit code [{run_code}]") + except Exception as e: + print(f"ERROR: Run script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Run scrip finished ===\n\n") + + if not dummy_env: + print(f"=== Post run script [{job.name}], workflow [{workflow.name}] ===") + self._post_run(workflow, job, setup_env_code, prerun_code, run_code) + print(f"=== Post run scrip finished ===") + + if not res: + sys.exit(1) diff --git a/praktika/runtime.py b/praktika/runtime.py new file mode 100644 index 000000000000..a87b67c2c796 --- /dev/null +++ b/praktika/runtime.py @@ -0,0 +1,35 @@ +from dataclasses import dataclass +from typing import Dict, List + +from praktika.cache import Cache +from praktika.settings import Settings +from praktika.utils import MetaClasses, Utils + + +@dataclass +class RunConfig(MetaClasses.Serializable): + name: str + digest_jobs: Dict[str, str] + digest_dockers: Dict[str, str] + cache_success: List[str] + # there are might be issue with special characters in job names if used directly in yaml syntax - create base64 encoded list to avoid this + cache_success_base64: List[str] + cache_artifacts: Dict[str, Cache.CacheRecord] + sha: str + + @classmethod + def from_dict(cls, obj): + cache_artifacts = obj["cache_artifacts"] + cache_artifacts_deserialized = {} + for artifact_name, cache_artifact in cache_artifacts.items(): + cache_artifacts_deserialized[artifact_name] = Cache.CacheRecord.from_dict( + cache_artifact + ) + obj["cache_artifacts"] = cache_artifacts_deserialized + return RunConfig(**obj) + + @classmethod + def file_name_static(cls, name): + return ( + f"{Settings.TEMP_DIR}/workflow_config_{Utils.normalize_string(name)}.json" + ) diff --git a/praktika/s3.py b/praktika/s3.py new file mode 100644 index 000000000000..8cfb70a90760 --- /dev/null +++ b/praktika/s3.py @@ -0,0 +1,295 @@ +import dataclasses +import json +import time +from pathlib import Path +from typing import Dict + +from praktika._environment import _Environment +from praktika.settings import Settings +from praktika.utils import Shell, Utils + + +class S3: + @dataclasses.dataclass + class Object: + AcceptRanges: str + Expiration: str + LastModified: str + ContentLength: int + ETag: str + ContentType: str + ServerSideEncryption: str + Metadata: Dict + + def has_tags(self, tags): + meta = self.Metadata + for k, v in tags.items(): + if k not in meta or meta[k] != v: + print(f"tag [{k}={v}] does not match meta [{meta}]") + return False + return True + + @classmethod + def clean_s3_directory(cls, s3_path): + assert len(s3_path.split("/")) > 2, "check to not delete too much" + cmd = f"aws s3 rm s3://{s3_path} --recursive" + cls.run_command_with_retries(cmd, retries=1) + return + + @classmethod + def copy_file_to_s3(cls, s3_path, local_path, text=False): + assert Path(local_path).exists(), f"Path [{local_path}] does not exist" + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + assert Path( + local_path + ).is_file(), f"Path [{local_path}] is not file. Only files are supported" + file_name = Path(local_path).name + s3_full_path = s3_path + if not s3_full_path.endswith(file_name): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + cmd = f"aws s3 cp {local_path} s3://{s3_full_path}" + if text: + cmd += " --content-type text/plain" + res = cls.run_command_with_retries(cmd) + if not res: + raise + bucket = s3_path.split("/")[0] + endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket] + assert endpoint + return f"https://{s3_full_path}".replace(bucket, endpoint) + + @classmethod + def put(cls, s3_path, local_path, text=False, metadata=None): + assert Path(local_path).exists(), f"Path [{local_path}] does not exist" + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + assert Path( + local_path + ).is_file(), f"Path [{local_path}] is not file. Only files are supported" + file_name = Path(local_path).name + s3_full_path = s3_path + if not s3_full_path.endswith(file_name): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + + s3_full_path = str(s3_full_path).removeprefix("s3://") + bucket, key = s3_full_path.split("/", maxsplit=1) + + command = ( + f"aws s3api put-object --bucket {bucket} --key {key} --body {local_path}" + ) + if metadata: + for k, v in metadata.items(): + command += f" --metadata {k}={v}" + + cmd = f"aws s3 cp {local_path} s3://{s3_full_path}" + if text: + cmd += " --content-type text/plain" + res = cls.run_command_with_retries(command) + assert res + + @classmethod + def run_command_with_retries(cls, command, retries=Settings.MAX_RETRIES_S3): + i = 0 + res = False + while not res and i < retries: + i += 1 + ret_code, stdout, stderr = Shell.get_res_stdout_stderr( + command, verbose=True + ) + if "aws sso login" in stderr: + print("ERROR: aws login expired") + break + elif "does not exist" in stderr: + print("ERROR: requested file does not exist") + break + if ret_code != 0: + print( + f"ERROR: aws s3 cp failed, stdout/stderr err: [{stderr}], out [{stdout}]" + ) + res = ret_code == 0 + return res + + @classmethod + def get_link(cls, s3_path, local_path): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + bucket = s3_path.split("/")[0] + endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket] + return f"https://{s3_full_path}".replace(bucket, endpoint) + + @classmethod + def copy_file_from_s3(cls, s3_path, local_path): + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + if Path(local_path).is_dir(): + local_path = Path(local_path) / Path(s3_path).name + else: + assert Path( + local_path + ).parent.is_dir(), f"Parent path for [{local_path}] does not exist" + cmd = f"aws s3 cp s3://{s3_path} {local_path}" + res = cls.run_command_with_retries(cmd) + return res + + @classmethod + def head_object(cls, s3_path): + s3_path = str(s3_path).removeprefix("s3://") + bucket, key = s3_path.split("/", maxsplit=1) + output = Shell.get_output( + f"aws s3api head-object --bucket {bucket} --key {key}", verbose=True + ) + if not output: + return None + else: + return cls.Object(**json.loads(output)) + + @classmethod + def delete(cls, s3_path): + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + return Shell.check( + f"aws s3 rm s3://{s3_path}", + verbose=True, + ) + + # TODO: apparently should be placed into separate file to be used only inside praktika + # keeping this module clean from importing Settings, Environment and etc, making it easy for use externally + @classmethod + def copy_result_to_s3(cls, result, unlock=True): + result.dump() + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + s3_path_full = f"{s3_path}/{Path(result.file_name()).name}" + url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name()) + if env.PR_NUMBER: + print("Duplicate Result for latest commit alias in PR") + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True)}" + url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name()) + if unlock: + if not cls.unlock(s3_path_full): + print(f"ERROR: File [{s3_path_full}] unlock failure") + assert False # TODO: investigate + return url + + @classmethod + def copy_result_from_s3(cls, local_path, lock=True): + env = _Environment.get() + file_name = Path(local_path).name + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}/{file_name}" + if lock: + cls.lock(s3_path) + if not S3.copy_file_from_s3(s3_path=s3_path, local_path=local_path): + print(f"ERROR: failed to cp file [{s3_path}] from s3") + raise + + @classmethod + def lock(cls, s3_path, level=0): + assert level < 3, "Never" + env = _Environment.get() + s3_path_lock = s3_path + f".lock" + file_path_lock = f"{Settings.TEMP_DIR}/{Path(s3_path_lock).name}" + assert Shell.check( + f"echo '''{env.JOB_NAME}''' > {file_path_lock}", verbose=True + ), "Never" + + i = 20 + meta = S3.head_object(s3_path_lock) + while meta: + print(f"WARNING: Failed to acquire lock, meta [{meta}] - wait") + i -= 5 + if i < 0: + info = f"ERROR: lock acquire failure - unlock forcefully" + print(info) + env.add_info(info) + break + time.sleep(5) + + metadata = {"job": Utils.to_base64(env.JOB_NAME)} + S3.put( + s3_path=s3_path_lock, + local_path=file_path_lock, + metadata=metadata, + ) + time.sleep(1) + obj = S3.head_object(s3_path_lock) + if not obj or not obj.has_tags(tags=metadata): + print(f"WARNING: locked by another job [{obj}]") + env.add_info("S3 lock file failure") + cls.lock(s3_path, level=level + 1) + print("INFO: lock acquired") + + @classmethod + def unlock(cls, s3_path): + s3_path_lock = s3_path + ".lock" + env = _Environment.get() + obj = S3.head_object(s3_path_lock) + if not obj: + print("ERROR: lock file is removed") + assert False # investigate + elif not obj.has_tags({"job": Utils.to_base64(env.JOB_NAME)}): + print("ERROR: lock file was acquired by another job") + assert False # investigate + + if not S3.delete(s3_path_lock): + print(f"ERROR: File [{s3_path_lock}] delete failure") + print("INFO: lock released") + return True + + @classmethod + def get_result_link(cls, result): + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True if env.PR_NUMBER else False)}" + return S3.get_link(s3_path=s3_path, local_path=result.file_name()) + + @classmethod + def clean_latest_result(cls): + env = _Environment.get() + env.SHA = "latest" + assert env.PR_NUMBER + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + S3.clean_s3_directory(s3_path=s3_path) + + @classmethod + def _upload_file_to_s3( + cls, local_file_path, upload_to_s3: bool, text: bool = False, s3_subprefix="" + ) -> str: + if upload_to_s3: + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + if s3_subprefix: + s3_subprefix.removeprefix("/").removesuffix("/") + s3_path += f"/{s3_subprefix}" + html_link = S3.copy_file_to_s3( + s3_path=s3_path, local_path=local_file_path, text=text + ) + return html_link + return f"file://{Path(local_file_path).absolute()}" + + @classmethod + def upload_result_files_to_s3(cls, result): + if result.results: + for result_ in result.results: + cls.upload_result_files_to_s3(result_) + for file in result.files: + if not Path(file).is_file(): + print(f"ERROR: Invalid file [{file}] in [{result.name}] - skip upload") + result.info += f"\nWARNING: Result file [{file}] was not found" + file_link = cls._upload_file_to_s3(file, upload_to_s3=False) + else: + is_text = False + for text_file_suffix in Settings.TEXT_CONTENT_EXTENSIONS: + if file.endswith(text_file_suffix): + print( + f"File [{file}] matches Settings.TEXT_CONTENT_EXTENSIONS [{Settings.TEXT_CONTENT_EXTENSIONS}] - add text attribute for s3 object" + ) + is_text = True + break + file_link = cls._upload_file_to_s3( + file, + upload_to_s3=True, + text=is_text, + s3_subprefix=Utils.normalize_string(result.name), + ) + result.links.append(file_link) + if result.files: + print( + f"Result files [{result.files}] uploaded to s3 [{result.links[-len(result.files):]}] - clean files list" + ) + result.files = [] + result.dump() diff --git a/praktika/secret.py b/praktika/secret.py new file mode 100644 index 000000000000..9c033d767080 --- /dev/null +++ b/praktika/secret.py @@ -0,0 +1,61 @@ +import dataclasses +import os + +from praktika.utils import Shell + + +class Secret: + class Type: + AWS_SSM_VAR = "aws parameter" + AWS_SSM_SECRET = "aws secret" + GH_SECRET = "gh secret" + + @dataclasses.dataclass + class Config: + name: str + type: str + + def is_gh(self): + return self.type == Secret.Type.GH_SECRET + + def get_value(self): + if self.type == Secret.Type.AWS_SSM_VAR: + return self.get_aws_ssm_var() + if self.type == Secret.Type.AWS_SSM_SECRET: + return self.get_aws_ssm_secret() + elif self.type == Secret.Type.GH_SECRET: + return self.get_gh_secret() + else: + assert False, f"Not supported secret type, secret [{self}]" + + def get_aws_ssm_var(self): + res = Shell.get_output( + f"aws ssm get-parameter --name {self.name} --with-decryption --output text --query Parameter.Value", + ) + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def get_aws_ssm_secret(self): + name, secret_key_name = self.name, "" + if "." in self.name: + name, secret_key_name = self.name.split(".") + cmd = f"aws secretsmanager get-secret-value --secret-id {name} --query SecretString --output text" + if secret_key_name: + cmd += f" | jq -r '.[\"{secret_key_name}\"]'" + res = Shell.get_output(cmd, verbose=True) + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def get_gh_secret(self): + res = os.getenv(f"{self.name}") + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def __repr__(self): + return self.name diff --git a/praktika/settings.py b/praktika/settings.py new file mode 100644 index 000000000000..1a4068d9398b --- /dev/null +++ b/praktika/settings.py @@ -0,0 +1,8 @@ +from praktika._settings import _Settings +from praktika.mangle import _get_user_settings + +Settings = _Settings() + +user_settings = _get_user_settings() +for setting, value in user_settings.items(): + Settings.__setattr__(setting, value) diff --git a/praktika/utils.py b/praktika/utils.py new file mode 100644 index 000000000000..1983ce274a3c --- /dev/null +++ b/praktika/utils.py @@ -0,0 +1,597 @@ +import base64 +import dataclasses +import glob +import json +import multiprocessing +import os +import re +import signal +import subprocess +import sys +import time +from abc import ABC, abstractmethod +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path +from threading import Thread +from types import SimpleNamespace +from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar, Union + +from praktika._settings import _Settings + +T = TypeVar("T", bound="Serializable") + + +class MetaClasses: + class WithIter(type): + def __iter__(cls): + return (v for k, v in cls.__dict__.items() if not k.startswith("_")) + + @dataclasses.dataclass + class Serializable(ABC): + @classmethod + def to_dict(cls, obj): + if dataclasses.is_dataclass(obj): + return {k: cls.to_dict(v) for k, v in dataclasses.asdict(obj).items()} + elif isinstance(obj, SimpleNamespace): + return {k: cls.to_dict(v) for k, v in vars(obj).items()} + elif isinstance(obj, list): + return [cls.to_dict(i) for i in obj] + elif isinstance(obj, dict): + return {k: cls.to_dict(v) for k, v in obj.items()} + else: + return obj + + @classmethod + def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T: + return cls(**obj) + + @classmethod + def from_fs(cls: Type[T], name) -> T: + with open(cls.file_name_static(name), "r", encoding="utf8") as f: + try: + return cls.from_dict(json.load(f)) + except json.decoder.JSONDecodeError as ex: + print(f"ERROR: failed to parse json, ex [{ex}]") + print(f"JSON content [{cls.file_name_static(name)}]") + Shell.check(f"cat {cls.file_name_static(name)}") + raise ex + + @classmethod + @abstractmethod + def file_name_static(cls, name): + pass + + def file_name(self): + return self.file_name_static(self.name) + + def dump(self): + with open(self.file_name(), "w", encoding="utf8") as f: + json.dump(self.to_dict(self), f, indent=4) + return self + + @classmethod + def exist(cls, name): + return Path(cls.file_name_static(name)).is_file() + + def to_json(self, pretty=False): + return json.dumps(dataclasses.asdict(self), indent=4 if pretty else None) + + +class ContextManager: + @staticmethod + @contextmanager + def cd(to: Optional[Union[Path, str]] = None) -> Iterator[None]: + """ + changes current working directory to @path or `git root` if @path is None + :param to: + :return: + """ + if not to: + try: + to = Shell.get_output_or_raise("git rev-parse --show-toplevel") + except: + pass + if not to: + if Path(_Settings.DOCKER_WD).is_dir(): + to = _Settings.DOCKER_WD + if not to: + assert False, "FIX IT" + assert to + old_pwd = os.getcwd() + os.chdir(to) + try: + yield + finally: + os.chdir(old_pwd) + + +class Shell: + @classmethod + def get_output_or_raise(cls, command, verbose=False): + return cls.get_output(command, verbose=verbose, strict=True).strip() + + @classmethod + def get_output(cls, command, strict=False, verbose=False): + if verbose: + print(f"Run command [{command}]") + res = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if res.stderr: + print(f"WARNING: stderr: {res.stderr.strip()}") + if strict and res.returncode != 0: + raise RuntimeError(f"command failed with {res.returncode}") + return res.stdout.strip() + + @classmethod + def get_res_stdout_stderr(cls, command, verbose=True): + if verbose: + print(f"Run command [{command}]") + res = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return res.returncode, res.stdout.strip(), res.stderr.strip() + + @classmethod + def check( + cls, + command, + log_file=None, + strict=False, + verbose=False, + dry_run=False, + stdin_str=None, + timeout=None, + retries=0, + **kwargs, + ): + return ( + cls.run( + command, + log_file, + strict, + verbose, + dry_run, + stdin_str, + retries=retries, + timeout=timeout, + **kwargs, + ) + == 0 + ) + + @classmethod + def run( + cls, + command, + log_file=None, + strict=False, + verbose=False, + dry_run=False, + stdin_str=None, + timeout=None, + retries=0, + **kwargs, + ): + def _check_timeout(timeout, process) -> None: + if not timeout: + return + time.sleep(timeout) + print( + f"WARNING: Timeout exceeded [{timeout}], sending SIGTERM to process group [{process.pid}]" + ) + try: + os.killpg(process.pid, signal.SIGTERM) + except ProcessLookupError: + print("Process already terminated.") + return + + time_wait = 0 + wait_interval = 5 + + # Wait for process to terminate + while process.poll() is None and time_wait < 100: + print("Waiting for process to exit...") + time.sleep(wait_interval) + time_wait += wait_interval + + # Force kill if still running + if process.poll() is None: + print(f"WARNING: Process still running after SIGTERM, sending SIGKILL") + try: + os.killpg(process.pid, signal.SIGKILL) + except ProcessLookupError: + print("Process already terminated.") + + # Dry-run + if dry_run: + print(f"Dry-run. Would run command [{command}]") + return 0 # Return success for dry-run + + if verbose: + print(f"Run command: [{command}]") + + log_file = log_file or "/dev/null" + proc = None + for retry in range(retries + 1): + try: + with open(log_file, "w") as log_fp: + proc = subprocess.Popen( + command, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE if stdin_str else None, + universal_newlines=True, + start_new_session=True, # Start a new process group for signal handling + bufsize=1, # Line-buffered + errors="backslashreplace", + **kwargs, + ) + + # Start the timeout thread if specified + if timeout: + t = Thread(target=_check_timeout, args=(timeout, proc)) + t.daemon = True + t.start() + + # Write stdin if provided + if stdin_str: + proc.stdin.write(stdin_str) + proc.stdin.close() + + # Process output in real-time + if proc.stdout: + for line in proc.stdout: + sys.stdout.write(line) + log_fp.write(line) + + proc.wait() # Wait for the process to finish + + if proc.returncode == 0: + break # Exit retry loop if success + else: + if verbose: + print( + f"ERROR: command [{command}] failed, exit code: {proc.returncode}, retry: {retry}/{retries}" + ) + except Exception as e: + if verbose: + print( + f"ERROR: command failed, exception: {e}, retry: {retry}/{retries}" + ) + if proc: + proc.kill() + + # Handle strict mode (ensure process success or fail) + if strict: + assert ( + proc and proc.returncode == 0 + ), f"Command failed with return code {proc.returncode}" + + return proc.returncode if proc else 1 # Return 1 if process never started + + @classmethod + def run_async( + cls, + command, + stdin_str=None, + verbose=False, + suppress_output=False, + **kwargs, + ): + if verbose: + print(f"Run command in background [{command}]") + proc = subprocess.Popen( + command, + shell=True, + stderr=subprocess.STDOUT if not suppress_output else subprocess.DEVNULL, + stdout=subprocess.PIPE if not suppress_output else subprocess.DEVNULL, + stdin=subprocess.PIPE if stdin_str else None, + universal_newlines=True, + start_new_session=True, + bufsize=1, + errors="backslashreplace", + **kwargs, + ) + if proc.stdout: + for line in proc.stdout: + print(line, end="") + return proc + + +class Utils: + @staticmethod + def terminate_process_group(pid, force=False): + if not force: + os.killpg(os.getpgid(pid), signal.SIGTERM) + else: + os.killpg(os.getpgid(pid), signal.SIGKILL) + + @staticmethod + def set_env(key, val): + os.environ[key] = val + + @staticmethod + def print_formatted_error(error_message, stdout="", stderr=""): + stdout_lines = stdout.splitlines() if stdout else [] + stderr_lines = stderr.splitlines() if stderr else [] + print(f"ERROR: {error_message}") + if stdout_lines: + print(" Out:") + for line in stdout_lines: + print(f" | {line}") + if stderr_lines: + print(" Err:") + for line in stderr_lines: + print(f" | {line}") + + @staticmethod + def sleep(seconds): + time.sleep(seconds) + + @staticmethod + def cwd(): + return Path.cwd() + + @staticmethod + def cpu_count(): + return multiprocessing.cpu_count() + + @staticmethod + def raise_with_error(error_message, stdout="", stderr=""): + Utils.print_formatted_error(error_message, stdout, stderr) + raise + + @staticmethod + def timestamp(): + return datetime.utcnow().timestamp() + + @staticmethod + def timestamp_to_str(timestamp): + return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S") + + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None + + @staticmethod + def is_killed_with_oom(): + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + return True + return False + + @staticmethod + def clear_dmesg(): + Shell.check("sudo dmesg --clear", verbose=True) + + @staticmethod + def to_base64(value): + assert isinstance(value, str), f"TODO: not supported for {type(value)}" + string_bytes = value.encode("utf-8") + base64_bytes = base64.b64encode(string_bytes) + base64_string = base64_bytes.decode("utf-8") + return base64_string + + @staticmethod + def is_hex(s): + try: + int(s, 16) + return True + except ValueError: + return False + + @staticmethod + def normalize_string(string: str) -> str: + res = string.lower() + for r in ( + (" ", "_"), + ("(", ""), + (")", ""), + ("{", ""), + ("}", ""), + ("'", ""), + ("[", ""), + ("]", ""), + (",", ""), + ("/", "_"), + ("-", "_"), + (":", ""), + ('"', ""), + ): + res = res.replace(*r) + return res + + @staticmethod + def traverse_path(path, file_suffixes=None, sorted=False, not_exists_ok=False): + res = [] + + def is_valid_file(file): + if file_suffixes is None: + return True + return any(file.endswith(suffix) for suffix in file_suffixes) + + if os.path.isfile(path): + if is_valid_file(path): + res.append(path) + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for file in files: + full_path = os.path.join(root, file) + if is_valid_file(full_path): + res.append(full_path) + elif "*" in str(path): + res.extend( + [ + f + for f in glob.glob(path, recursive=True) + if os.path.isfile(f) and is_valid_file(f) + ] + ) + else: + if not_exists_ok: + pass + else: + assert False, f"File does not exist or not valid [{path}]" + + if sorted: + res.sort(reverse=True) + + return res + + @classmethod + def traverse_paths( + cls, + include_paths, + exclude_paths, + file_suffixes=None, + sorted=False, + not_exists_ok=False, + ) -> List["str"]: + included_files_ = set() + for path in include_paths: + included_files_.update(cls.traverse_path(path, file_suffixes=file_suffixes)) + + excluded_files = set() + for path in exclude_paths: + res = cls.traverse_path(path, not_exists_ok=not_exists_ok) + if not res: + print( + f"WARNING: Utils.traverse_paths excluded 0 files by path [{path}] in exclude_paths" + ) + else: + excluded_files.update(res) + res = [f for f in included_files_ if f not in excluded_files] + if sorted: + res.sort(reverse=True) + return res + + @classmethod + def add_to_PATH(cls, path): + path_cur = os.getenv("PATH", "") + if path_cur: + path += ":" + path_cur + os.environ["PATH"] = path + + class Stopwatch: + def __init__(self): + self.start_time = datetime.utcnow().timestamp() + + @property + def duration(self) -> float: + return datetime.utcnow().timestamp() - self.start_time + + +class TeePopen: + def __init__( + self, + command: str, + log_file: Union[str, Path] = "", + env: Optional[dict] = None, + timeout: Optional[int] = None, + ): + self.command = command + self.log_file_name = log_file + self.log_file = None + self.env = env or os.environ.copy() + self.process = None # type: Optional[subprocess.Popen] + self.timeout = timeout + self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False + + def _check_timeout(self) -> None: + if self.timeout is None: + return + time.sleep(self.timeout) + print( + f"WARNING: Timeout exceeded [{self.timeout}], send SIGTERM to [{self.process.pid}] and give a chance for graceful termination" + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True + self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + time.sleep(wait) + time_wait += wait + while self.process.poll() is None: + print(f"WARNING: Still running, send SIGKILL to [{self.process.pid}]") + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + time.sleep(2) + + def __enter__(self) -> "TeePopen": + if self.log_file_name: + self.log_file = open(self.log_file_name, "w", encoding="utf-8") + self.process = subprocess.Popen( + self.command, + shell=True, + universal_newlines=True, + env=self.env, + start_new_session=True, # signall will be sent to all children + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + bufsize=1, + errors="backslashreplace", + ) + time.sleep(1) + print(f"Subprocess started, pid [{self.process.pid}]") + if self.timeout is not None and self.timeout > 0: + t = Thread(target=self._check_timeout) + t.daemon = True # does not block the program from exit + t.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.wait() + if self.log_file: + self.log_file.close() + + def wait(self) -> int: + if self.process.stdout is not None: + for line in self.process.stdout: + sys.stdout.write(line) + if self.log_file: + self.log_file.write(line) + + return self.process.wait() + + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + os.killpg(self.process.pid, signal_num) + + +if __name__ == "__main__": + + @dataclasses.dataclass + class Test(MetaClasses.Serializable): + name: str + + @staticmethod + def file_name_static(name): + return f"/tmp/{Utils.normalize_string(name)}.json" + + Test(name="dsada").dump() + t = Test.from_fs("dsada") + print(t) diff --git a/praktika/validator.py b/praktika/validator.py new file mode 100644 index 000000000000..29edc0a27ed9 --- /dev/null +++ b/praktika/validator.py @@ -0,0 +1,208 @@ +import glob +import sys +from itertools import chain +from pathlib import Path + +from praktika import Workflow +from praktika._settings import GHRunners +from praktika.mangle import _get_workflows +from praktika.settings import Settings +from praktika.utils import ContextManager + + +class Validator: + @classmethod + def validate(cls): + print("---Start validating Pipeline and settings---") + workflows = _get_workflows() + for workflow in workflows: + print(f"Validating workflow [{workflow.name}]") + + cls.validate_file_paths_in_run_command(workflow) + cls.validate_file_paths_in_digest_configs(workflow) + cls.validate_requirements_txt_files(workflow) + cls.validate_dockers(workflow) + + if workflow.artifacts: + for artifact in workflow.artifacts: + if artifact.is_s3_artifact(): + assert ( + Settings.S3_ARTIFACT_PATH + ), "Provide S3_ARTIFACT_PATH setting in any .py file in ./ci/settings/* to be able to use s3 for artifacts" + + for job in workflow.jobs: + if job.requires and workflow.artifacts: + for require in job.requires: + if ( + require in workflow.artifacts + and workflow.artifacts[require].is_s3_artifact() + ): + assert not any( + [r in GHRunners for r in job.runs_on] + ), f"GH runners [{job.name}:{job.runs_on}] must not be used with S3 as artifact storage" + + if job.allow_merge_on_failure: + assert ( + workflow.enable_merge_ready_status + ), f"Job property allow_merge_on_failure must be used only with enabled workflow.enable_merge_ready_status, workflow [{workflow.name}], job [{job.name}]" + + if workflow.enable_cache: + assert ( + Settings.CI_CONFIG_RUNS_ON + ), f"Runner label to run workflow config job must be provided via CACHE_CONFIG_RUNS_ON setting if enable_cache=True, workflow [{workflow.name}]" + + assert ( + Settings.CACHE_S3_PATH + ), f"CACHE_S3_PATH Setting must be defined if enable_cache=True, workflow [{workflow.name}]" + + if workflow.dockers: + cls.evaluate_check( + Settings.DOCKER_BUILD_RUNS_ON, + f"DOCKER_BUILD_RUNS_ON settings must be defined if workflow has dockers", + workflow_name=workflow.name, + ) + + if workflow.enable_report: + assert ( + Settings.HTML_S3_PATH + ), f"HTML_S3_PATH Setting must be defined if enable_html=True, workflow [{workflow.name}]" + assert ( + Settings.S3_BUCKET_TO_HTTP_ENDPOINT + ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must be defined if enable_html=True, workflow [{workflow.name}]" + assert ( + Settings.HTML_S3_PATH.split("/")[0] + in Settings.S3_BUCKET_TO_HTTP_ENDPOINT + ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must include bucket name [{Settings.HTML_S3_PATH}] from HTML_S3_PATH, workflow [{workflow.name}]" + + if workflow.enable_cache: + for artifact in workflow.artifacts or []: + assert ( + artifact.is_s3_artifact() + ), f"All artifacts must be of S3 type if enable_cache|enable_html=True, artifact [{artifact.name}], type [{artifact.type}], workflow [{workflow.name}]" + + if workflow.dockers: + assert ( + Settings.DOCKERHUB_USERNAME + ), f"Settings.DOCKERHUB_USERNAME must be provided if workflow has dockers, workflow [{workflow.name}]" + assert ( + Settings.DOCKERHUB_SECRET + ), f"Settings.DOCKERHUB_SECRET must be provided if workflow has dockers, workflow [{workflow.name}]" + assert workflow.get_secret( + Settings.DOCKERHUB_SECRET + ), f"Secret [{Settings.DOCKERHUB_SECRET}] must have configuration in workflow.secrets, workflow [{workflow.name}]" + + if ( + workflow.enable_cache + or workflow.enable_report + or workflow.enable_merge_ready_status + ): + for job in workflow.jobs: + assert not any( + job in ("ubuntu-latest",) for job in job.runs_on + ), f"GitHub Runners must not be used for workflow with enabled: workflow.enable_cache, workflow.enable_html or workflow.enable_merge_ready_status as s3 access is required, workflow [{workflow.name}], job [{job.name}]" + + if workflow.enable_cidb: + assert ( + Settings.SECRET_CI_DB_URL + ), f"Settings.CI_DB_URL_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.SECRET_CI_DB_PASSWORD + ), f"Settings.CI_DB_PASSWORD_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.CI_DB_DB_NAME + ), f"Settings.CI_DB_DB_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.CI_DB_TABLE_NAME + ), f"Settings.CI_DB_TABLE_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + + @classmethod + def validate_file_paths_in_run_command(cls, workflow: Workflow.Config) -> None: + if not Settings.VALIDATE_FILE_PATHS: + return + with ContextManager.cd(): + for job in workflow.jobs: + run_command = job.command + command_parts = run_command.split(" ") + for part in command_parts: + if ">" in part: + return + if "/" in part: + assert ( + Path(part).is_file() or Path(part).is_dir() + ), f"Apparently run command [{run_command}] for job [{job}] has invalid path [{part}]. Setting to disable check: VALIDATE_FILE_PATHS" + + @classmethod + def validate_file_paths_in_digest_configs(cls, workflow: Workflow.Config) -> None: + if not Settings.VALIDATE_FILE_PATHS: + return + with ContextManager.cd(): + for job in workflow.jobs: + if not job.digest_config: + continue + for include_path in chain( + job.digest_config.include_paths, job.digest_config.exclude_paths + ): + if "*" in include_path: + assert glob.glob( + include_path, recursive=True + ), f"Apparently file glob [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS" + else: + assert ( + Path(include_path).is_file() or Path(include_path).is_dir() + ), f"Apparently file path [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS" + + @classmethod + def validate_requirements_txt_files(cls, workflow: Workflow.Config) -> None: + with ContextManager.cd(): + for job in workflow.jobs: + if job.job_requirements: + if job.job_requirements.python_requirements_txt: + path = Path(job.job_requirements.python_requirements_txt) + message = f"File with py requirement [{path}] does not exist" + if job.name in ( + Settings.DOCKER_BUILD_JOB_NAME, + Settings.CI_CONFIG_JOB_NAME, + Settings.FINISH_WORKFLOW_JOB_NAME, + ): + message += '\n If all requirements already installed on your runners - add setting INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS""' + message += "\n If requirements needs to be installed - add requirements file (Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS):" + message += "\n echo jwt==1.3.1 > ./ci/requirements.txt" + message += ( + "\n echo requests==2.32.3 >> ./ci/requirements.txt" + ) + message += "\n echo https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl >> ./ci/requirements.txt" + cls.evaluate_check( + path.is_file(), message, job.name, workflow.name + ) + + @classmethod + def validate_dockers(cls, workflow: Workflow.Config): + names = [] + for docker in workflow.dockers: + cls.evaluate_check( + docker.name not in names, + f"Non uniq docker name [{docker.name}]", + workflow_name=workflow.name, + ) + names.append(docker.name) + for docker in workflow.dockers: + for docker_dep in docker.depends_on: + cls.evaluate_check( + docker_dep in names, + f"Docker [{docker.name}] has invalid dependency [{docker_dep}]", + workflow_name=workflow.name, + ) + + @classmethod + def evaluate_check(cls, check_ok, message, workflow_name, job_name=""): + message = message.split("\n") + messages = [message] if not isinstance(message, list) else message + if check_ok: + return + else: + print( + f"ERROR: Config validation failed: workflow [{workflow_name}], job [{job_name}]:" + ) + for message in messages: + print(" || " + message) + sys.exit(1) diff --git a/praktika/version.py b/praktika/version.py new file mode 100644 index 000000000000..b71dad9b794b --- /dev/null +++ b/praktika/version.py @@ -0,0 +1 @@ +VERSION = 1 diff --git a/praktika/workflow.py b/praktika/workflow.py new file mode 100644 index 000000000000..41e8056f9ef6 --- /dev/null +++ b/praktika/workflow.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass, field +from typing import List, Optional + +from praktika import Artifact, Job +from praktika.docker import Docker +from praktika.secret import Secret +from praktika.utils import Utils + + +class Workflow: + class Event: + PULL_REQUEST = "pull_request" + PUSH = "push" + + @dataclass + class Config: + """ + branches - List of branch names or patterns, for push trigger only + base_branches - List of base branches (target branch), for pull_request trigger only + """ + + name: str + event: str + jobs: List[Job.Config] + branches: List[str] = field(default_factory=list) + base_branches: List[str] = field(default_factory=list) + artifacts: List[Artifact.Config] = field(default_factory=list) + dockers: List[Docker.Config] = field(default_factory=list) + secrets: List[Secret.Config] = field(default_factory=list) + enable_cache: bool = False + enable_report: bool = False + enable_merge_ready_status: bool = False + enable_cidb: bool = False + + def is_event_pull_request(self): + return self.event == Workflow.Event.PULL_REQUEST + + def is_event_push(self): + return self.event == Workflow.Event.PUSH + + def get_job(self, name): + job = self.find_job(name) + if not job: + Utils.raise_with_error( + f"Failed to find job [{name}], workflow [{self.name}]" + ) + return job + + def find_job(self, name, lazy=False): + name = str(name) + for job in self.jobs: + if lazy: + if name.lower() in job.name.lower(): + return job + else: + if job.name == name: + return job + return None + + def get_secret(self, name) -> Optional[Secret.Config]: + name = str(name) + names = [] + for secret in self.secrets: + if secret.name == name: + return secret + names.append(secret.name) + print(f"ERROR: Failed to find secret [{name}], workflow secrets [{names}]") + raise diff --git a/praktika/yaml_generator.py b/praktika/yaml_generator.py new file mode 100644 index 000000000000..9c61b5e2f79e --- /dev/null +++ b/praktika/yaml_generator.py @@ -0,0 +1,349 @@ +import dataclasses +from typing import List + +from praktika import Artifact, Job, Workflow +from praktika.mangle import _get_workflows +from praktika.parser import WorkflowConfigParser +from praktika.runtime import RunConfig +from praktika.settings import Settings +from praktika.utils import ContextManager, Shell, Utils + + +class YamlGenerator: + class Templates: + TEMPLATE_PULL_REQUEST_0 = """\ +# generated by praktika + +name: {NAME} + +on: + {EVENT}: + branches: [{BRANCHES}] + +# Cancel the previous wf run in PRs. +concurrency: + group: ${{{{{{{{ github.workflow }}}}}}}}-${{{{{{{{ github.ref }}}}}}}} + cancel-in-progress: true + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + GH_TOKEN: ${{{{{{{{ github.token }}}}}}}} + +# Allow updating GH commit statuses and PR comments to post an actual job reports link +permissions: write-all + +jobs: +{JOBS}\ +""" + + TEMPLATE_CALLABLE_WORKFLOW = """\ +# generated by praktika + +name: {NAME} +on: + workflow_call: + inputs: + config: + type: string + required: false + default: '' + secrets: +{SECRETS} + +env: + PYTHONUNBUFFERED: 1 + +jobs: +{JOBS}\ +""" + + TEMPLATE_SECRET_CONFIG = """\ + {SECRET_NAME}: + required: true +""" + + TEMPLATE_MATRIX = """ + strategy: + fail-fast: false + matrix: + params: {PARAMS_LIST}\ +""" + + TEMPLATE_JOB_0 = """ + {JOB_NAME_NORMALIZED}: + runs-on: [{RUNS_ON}] + needs: [{NEEDS}]{IF_EXPRESSION} + name: "{JOB_NAME_GH}" + outputs: + data: ${{{{ steps.run.outputs.DATA }}}} + steps: + - name: Checkout code + uses: actions/checkout@v4 +{JOB_ADDONS} + - name: Prepare env script + run: | + export PYTHONPATH=.:$PYTHONPATH + cat > {ENV_SETUP_SCRIPT} << 'ENV_SETUP_SCRIPT_EOF' +{SETUP_ENVS} + cat > {WORKFLOW_CONFIG_FILE} << 'EOF' + ${{{{ needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data }}}} + EOF + cat > {WORKFLOW_STATUS_FILE} << 'EOF' + ${{{{ toJson(needs) }}}} + EOF + ENV_SETUP_SCRIPT_EOF + + rm -rf {INPUT_DIR} {OUTPUT_DIR} {TEMP_DIR} + mkdir -p {TEMP_DIR} {INPUT_DIR} {OUTPUT_DIR} +{DOWNLOADS_GITHUB} + - name: Run + id: run + run: | + set -o pipefail + {PYTHON} -m praktika run --job '''{JOB_NAME}''' --workflow "{WORKFLOW_NAME}" --ci |& tee {RUN_LOG} +{UPLOADS_GITHUB}\ +""" + + TEMPLATE_SETUP_ENV_SECRETS = """\ + export {SECRET_NAME}=$(cat<<'EOF' + ${{{{ secrets.{SECRET_NAME} }}}} + EOF + )\ +""" + + TEMPLATE_PY_INSTALL = """ + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: {PYTHON_VERSION} +""" + + TEMPLATE_PY_WITH_REQUIREMENTS = """ + - name: Install dependencies + run: | + sudo apt-get update && sudo apt install -y python3-pip + # TODO: --break-system-packages? otherwise ubuntu's apt/apt-get complains + {PYTHON} -m pip install --upgrade pip --break-system-packages + {PIP} install -r {REQUIREMENT_PATH} --break-system-packages +""" + + TEMPLATE_GH_UPLOAD = """ + - name: Upload artifact {NAME} + uses: actions/upload-artifact@v4 + with: + name: {NAME} + path: {PATH} +""" + + TEMPLATE_GH_DOWNLOAD = """ + - name: Download artifact {NAME} + uses: actions/download-artifact@v4 + with: + name: {NAME} + path: {PATH} +""" + + TEMPLATE_IF_EXPRESSION = """ + if: ${{{{ !failure() && !cancelled() && !contains(fromJson(needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data).cache_success_base64, '{JOB_NAME_BASE64}') }}}}\ +""" + + TEMPLATE_IF_EXPRESSION_SKIPPED_OR_SUCCESS = """ + if: ${{ !failure() && !cancelled() }}\ +""" + + TEMPLATE_IF_EXPRESSION_NOT_CANCELLED = """ + if: ${{ !cancelled() }}\ +""" + + def __init__(self): + self.py_workflows = [] # type: List[Workflow.Config] + + @classmethod + def _get_workflow_file_name(cls, workflow_name): + return f"{Settings.WORKFLOW_PATH_PREFIX}/{Utils.normalize_string(workflow_name)}.yaml" + + def generate(self, workflow_file="", workflow_config=None): + print("---Start generating yaml pipelines---") + if workflow_config: + self.py_workflows = [workflow_config] + else: + self.py_workflows = _get_workflows(file=workflow_file) + assert self.py_workflows + for workflow_config in self.py_workflows: + print(f"Generate workflow [{workflow_config.name}]") + parser = WorkflowConfigParser(workflow_config).parse() + if ( + workflow_config.is_event_pull_request() + or workflow_config.is_event_push() + ): + yaml_workflow_str = PullRequestPushYamlGen(parser).generate() + else: + assert ( + False + ), f"Workflow event not yet supported [{workflow_config.event}]" + + with ContextManager.cd(): + with open(self._get_workflow_file_name(workflow_config.name), "w") as f: + f.write(yaml_workflow_str) + + with ContextManager.cd(): + Shell.check("git add ./.github/workflows/*.yaml") + + +class PullRequestPushYamlGen: + def __init__(self, parser: WorkflowConfigParser): + self.workflow_config = parser.workflow_yaml_config + self.parser = parser + + def generate(self): + job_items = [] + for i, job in enumerate(self.workflow_config.jobs): + job_name_normalized = Utils.normalize_string(job.name) + needs = ", ".join(map(Utils.normalize_string, job.needs)) + job_name = job.name + job_addons = [] + for addon in job.addons: + if addon.install_python: + job_addons.append( + YamlGenerator.Templates.TEMPLATE_PY_INSTALL.format( + PYTHON_VERSION=Settings.PYTHON_VERSION + ) + ) + if addon.requirements_txt_path: + job_addons.append( + YamlGenerator.Templates.TEMPLATE_PY_WITH_REQUIREMENTS.format( + PYTHON=Settings.PYTHON_INTERPRETER, + PIP=Settings.PYTHON_PACKET_MANAGER, + PYTHON_VERSION=Settings.PYTHON_VERSION, + REQUIREMENT_PATH=addon.requirements_txt_path, + ) + ) + uploads_github = [] + for artifact in job.artifacts_gh_provides: + uploads_github.append( + YamlGenerator.Templates.TEMPLATE_GH_UPLOAD.format( + NAME=artifact.name, PATH=artifact.path + ) + ) + downloads_github = [] + for artifact in job.artifacts_gh_requires: + downloads_github.append( + YamlGenerator.Templates.TEMPLATE_GH_DOWNLOAD.format( + NAME=artifact.name, PATH=Settings.INPUT_DIR + ) + ) + + config_job_name_normalized = Utils.normalize_string( + Settings.CI_CONFIG_JOB_NAME + ) + + if_expression = "" + if ( + self.workflow_config.enable_cache + and job_name_normalized != config_job_name_normalized + ): + if_expression = YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION.format( + WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized, + JOB_NAME_BASE64=Utils.to_base64(job_name), + ) + if job.run_unless_cancelled: + if_expression = ( + YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION_NOT_CANCELLED + ) + + secrets_envs = [] + for secret in self.workflow_config.secret_names_gh: + secrets_envs.append( + YamlGenerator.Templates.TEMPLATE_SETUP_ENV_SECRETS.format( + SECRET_NAME=secret + ) + ) + + job_item = YamlGenerator.Templates.TEMPLATE_JOB_0.format( + JOB_NAME_NORMALIZED=job_name_normalized, + WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized, + IF_EXPRESSION=if_expression, + RUNS_ON=", ".join(job.runs_on), + NEEDS=needs, + JOB_NAME_GH=job_name.replace('"', '\\"'), + JOB_NAME=job_name.replace( + "'", "'\\''" + ), # ' must be escaped so that yaml commands are properly parsed + WORKFLOW_NAME=self.workflow_config.name, + ENV_SETUP_SCRIPT=Settings.ENV_SETUP_SCRIPT, + SETUP_ENVS="\n".join(secrets_envs), + WORKFLOW_CONFIG_FILE=RunConfig.file_name_static( + self.workflow_config.name + ), + JOB_ADDONS="".join(job_addons), + DOWNLOADS_GITHUB="\n".join(downloads_github), + UPLOADS_GITHUB="\n".join(uploads_github), + RUN_LOG=Settings.RUN_LOG, + PYTHON=Settings.PYTHON_INTERPRETER, + WORKFLOW_STATUS_FILE=Settings.WORKFLOW_STATUS_FILE, + TEMP_DIR=Settings.TEMP_DIR, + INPUT_DIR=Settings.INPUT_DIR, + OUTPUT_DIR=Settings.OUTPUT_DIR, + ) + job_items.append(job_item) + + base_template = YamlGenerator.Templates.TEMPLATE_PULL_REQUEST_0 + template_1 = base_template.strip().format( + NAME=self.workflow_config.name, + BRANCHES=", ".join( + [f"'{branch}'" for branch in self.workflow_config.branches] + ), + EVENT=self.workflow_config.event, + JOBS="{}" * len(job_items), + ) + res = template_1.format(*job_items) + + return res + + +@dataclasses.dataclass +class AuxConfig: + # defines aux step to install dependencies + addon: Job.Requirements + # defines aux step(s) to upload GH artifacts + uploads_gh: List[Artifact.Config] + # defines aux step(s) to download GH artifacts + downloads_gh: List[Artifact.Config] + + def get_aux_workflow_name(self): + suffix = "" + if self.addon.python_requirements_txt: + suffix += "_py" + for _ in self.uploads_gh: + suffix += "_uplgh" + for _ in self.downloads_gh: + suffix += "_dnlgh" + return f"{Settings.WORKFLOW_PATH_PREFIX}/aux_job{suffix}.yaml" + + def get_aux_workflow_input(self): + res = "" + if self.addon.python_requirements_txt: + res += f" requirements_txt: {self.addon.python_requirements_txt}" + return res + + +if __name__ == "__main__": + WFS = [ + Workflow.Config( + name="PR", + event=Workflow.Event.PULL_REQUEST, + jobs=[ + Job.Config( + name="Hello World", + runs_on=["foo"], + command="bar", + job_requirements=Job.Requirements( + python_requirements_txt="./requirement.txt" + ), + ) + ], + enable_cache=True, + ) + ] + YamlGenerator().generate(workflow_config=WFS) diff --git a/tests/integration/test_keeper_broken_logs/test.py b/tests/integration/test_keeper_broken_logs/test.py index f75e2ae4f201..be891f9b6c8a 100644 --- a/tests/integration/test_keeper_broken_logs/test.py +++ b/tests/integration/test_keeper_broken_logs/test.py @@ -1,4 +1,5 @@ import time +from multiprocessing.dummy import Pool import pytest @@ -52,15 +53,34 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def start_clickhouse(node): + node.start_clickhouse() + + +def clean_start(): + nodes = [node1, node2, node3] + for node in nodes: + node.stop_clickhouse() + + p = Pool(3) + waiters = [] + for node in nodes: + node.exec_in_container(["rm", "-rf", "/var/lib/clickhouse/coordination/log"]) + node.exec_in_container( + ["rm", "-rf", "/var/lib/clickhouse/coordination/snapshots"] + ) + waiters.append(p.apply_async(start_clickhouse, (node,))) + + for waiter in waiters: + waiter.wait() + + def test_single_node_broken_log(started_cluster): + clean_start() try: wait_nodes() node1_conn = get_fake_zk("node1") - # Cleanup - if node1_conn.exists("/test_broken_log") != None: - node1_conn.delete("/test_broken_log") - node1_conn.create("/test_broken_log") for _ in range(10): node1_conn.create(f"/test_broken_log/node", b"somedata1", sequence=True) @@ -110,10 +130,12 @@ def verify_nodes(zk_conn): verify_nodes(node3_conn) assert node3_conn.get("/test_broken_log_final_node")[0] == b"somedata1" - assert ( + node1_logs = ( node1.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) - == "changelog_1_100000.bin\nchangelog_14_100013.bin\n" + .strip() + .split("\n") ) + assert len(node1_logs) == 2 and node1_logs[0] == "changelog_1_100000.bin" assert ( node2.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n"