diff --git a/integration_tests/README.md b/integration_tests/README.md index 929d242e946..9493fbb07d1 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -409,6 +409,19 @@ the SHS supported values for the config key With `zstd` it's easy to view / decompress event logs using the CLI `zstd -d [--stdout] ` even without the SHS webUI. +### Worker Logs + +NOTE: Available only in local mode i.e. master URL = local[K, F] + +By default, when using xdist the integration tests will write the tests output to console and to a text file +that will appear under the run directory of the form +`integration_tests/target/run_dir--xxxx/WORKERID_worker_logs.log`. The output format of the log and the log level +can be changed by modifying the file `integration_tests/src/test/resources/pytest_log4j.properties`. + +If xdist is not used (e.g., `TEST_PARALLEL=1`) +the worker log will be `integration_tests/target/run_dir--xxxx/gw0_worker_logs.log` as if executed by +worker 0 under xdist. + ### Enabling cudf_udf Tests The cudf_udf tests in this framework are testing Pandas UDF(user-defined function) with cuDF. They are disabled by default not only because of the complicated environment setup, but also because GPU resources scheduling for Pandas UDF is an experimental feature now, the performance may not always be better. diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index 713c06c31e3..dec93e6f22a 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -205,10 +205,11 @@ else fi REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror + STD_INPUT_PATH="$INPUT_PATH"/src/test/resources TEST_COMMON_OPTS=(-v -r"$REPORT_CHARS" "$TEST_TAGS" - --std_input_path="$INPUT_PATH"/src/test/resources + --std_input_path="$STD_INPUT_PATH" --color=yes $TEST_TYPE_PARAM "$TEST_ARGS" @@ -241,7 +242,8 @@ else # Set the Delta log cache size to prevent the driver from caching every Delta log indefinitely export PYSP_TEST_spark_databricks_delta_delta_log_cacheSize=${PYSP_TEST_spark_databricks_delta_delta_log_cacheSize:-10} deltaCacheSize=$PYSP_TEST_spark_databricks_delta_delta_log_cacheSize - export PYSP_TEST_spark_driver_extraJavaOptions="-ea -Duser.timezone=$TZ -Ddelta.log.cacheSize=$deltaCacheSize $COVERAGE_SUBMIT_FLAGS" + DRIVER_EXTRA_JAVA_OPTIONS="-ea -Duser.timezone=$TZ -Ddelta.log.cacheSize=$deltaCacheSize" + export PYSP_TEST_spark_driver_extraJavaOptions="$DRIVER_EXTRA_JAVA_OPTIONS $COVERAGE_SUBMIT_FLAGS" export PYSP_TEST_spark_executor_extraJavaOptions="-ea -Duser.timezone=$TZ" export PYSP_TEST_spark_ui_showConsoleProgress='false' export PYSP_TEST_spark_sql_session_timeZone=$TZ @@ -313,7 +315,11 @@ EOF export PYSP_TEST_spark_master="local[$LOCAL_PARALLEL,$SPARK_TASK_MAXFAILURES]" fi fi - + if [[ "$SPARK_SUBMIT_FLAGS" == *"--master local"* || "$PYSP_TEST_spark_master" == "local"* ]]; then + # The only case where we want worker logs is in local mode so we set the value here explicitly + # We can't use the PYSP_TEST_spark_master as it's not always set e.g. when using --master + export USE_WORKER_LOGS=1 + fi # Set a seed to be used in the tests, for datagen export SPARK_RAPIDS_TEST_DATAGEN_SEED=${SPARK_RAPIDS_TEST_DATAGEN_SEED:-${DATAGEN_SEED:-`date +%s`}} echo "SPARK_RAPIDS_TEST_DATAGEN_SEED used: $SPARK_RAPIDS_TEST_DATAGEN_SEED" @@ -370,6 +376,15 @@ EOF then exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}" else + if [[ "$USE_WORKER_LOGS" == "1" ]]; then + # Setting the extraJavaOptions again to set the log4j confs that will be needed for writing logs in the expected location + # We have to export it again because we want to be able to let the user override these confs by setting them on the + # command-line using the COVERAGE_SUBMIT_FLAGS which won't be possible if we were to just say + # export $PYSP_TEST_spark_driver_extraJavaOptions = "$PYSP_TEST_spark_driver_extraJavaOptions $LOG4J_CONF" + LOG4J_CONF="-Dlog4j.configuration=file://$STD_INPUT_PATH/pytest_log4j.properties -Dlogfile=$RUN_DIR/gw0_worker_logs.log" + export PYSP_TEST_spark_driver_extraJavaOptions="$DRIVER_EXTRA_JAVA_OPTIONS $LOG4J_CONF $COVERAGE_SUBMIT_FLAGS" + fi + # We set the GPU memory size to be a constant value even if only running with a parallelism of 1 # because it helps us have consistent test runs. jarOpts=() diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py index 7be5291e471..6cb5c49fc71 100644 --- a/integration_tests/src/main/python/spark_init_internal.py +++ b/integration_tests/src/main/python/spark_init_internal.py @@ -14,9 +14,9 @@ import logging import os +import pytest import re import stat -import sys logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", @@ -96,6 +96,7 @@ def create_tmp_hive(): except Exception as e: logging.warn(f"Failed to setup the hive scratch dir {path}. Error {e}") +# Entry point into this file def pytest_sessionstart(session): # initializations that must happen globally once before tests start # if xdist in the coordinator, if not xdist in the pytest process @@ -131,10 +132,12 @@ def pytest_sessionstart(session): if ('PYTEST_XDIST_WORKER' in os.environ): wid = os.environ['PYTEST_XDIST_WORKER'] - _handle_derby_dir(_sb, driver_opts, wid) _handle_event_log_dir(_sb, wid) + driver_opts += _get_driver_opts_for_worker_logs(_sb, wid) + _handle_derby_dir(_sb, driver_opts, wid) _handle_ivy_cache_dir(_sb, wid) else: + driver_opts += _get_driver_opts_for_worker_logs(_sb, 'gw0') _sb.config('spark.driver.extraJavaOptions', driver_opts) _handle_event_log_dir(_sb, 'gw0') @@ -154,6 +157,45 @@ def _handle_derby_dir(sb, driver_opts, wid): os.makedirs(d) sb.config('spark.driver.extraJavaOptions', driver_opts + ' -Dderby.system.home={}'.format(d)) +def _use_worker_logs(): + return os.environ.get('USE_WORKER_LOGS') == '1' + +# Create a named logger to be used for only logging test name in `log_test_name` +logger = logging.getLogger('__pytest_worker_logger__') +def _get_driver_opts_for_worker_logs(_sb, wid): + if not _use_worker_logs(): + logging.info("Not setting worker logs. Worker logs on non-local mode are sent to the location pre-configured " + "by the user") + return "" + + current_directory = os.path.abspath(os.path.curdir) + log_file = '{}/{}_worker_logs.log'.format(current_directory, wid) + + from conftest import get_std_input_path + std_input_path = get_std_input_path() + # This is not going to take effect when TEST_PARALLEL=1 as it's set as a conf when calling spark-submit + driver_opts = ' -Dlog4j.configuration=file://{}/pytest_log4j.properties '.format(std_input_path) + \ + ' -Dlogfile={}'.format(log_file) + + # Set up Logging to the WORKERID_worker_logs + # Note: This logger is only used for logging the test name in method `log_test_name`. + global logger + logger.setLevel(logging.INFO) + # Create file handler to output logs into corresponding worker log file + # This file_handler is modifying the worker_log file that the plugin will also write to + # The reason for doing this is to get all test logs in one place from where we can do other analysis + # that might be needed in future to look at the execs that were used in our integration tests + file_handler = logging.FileHandler(log_file) + # Set the formatter for the file handler, we match the formatter from the basicConfig for consistency in logs + formatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S") + + file_handler.setFormatter(formatter) + + # Add the file handler to the logger + logger.addHandler(file_handler) + + return driver_opts def _handle_event_log_dir(sb, wid): if os.environ.get('SPARK_EVENTLOG_ENABLED', str(True)).lower() in [ @@ -208,3 +250,7 @@ def get_spark_i_know_what_i_am_doing(): def spark_version(): return _spark.version + +@pytest.fixture(scope='function', autouse=_use_worker_logs()) +def log_test_name(request): + logger.info("Running test '{}'".format(request.node.nodeid)) diff --git a/integration_tests/src/test/resources/log4j.properties b/integration_tests/src/test/resources/log4j.properties index e50b3a70971..6cc51f4894f 100644 --- a/integration_tests/src/test/resources/log4j.properties +++ b/integration_tests/src/test/resources/log4j.properties @@ -1,5 +1,5 @@ # -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,9 @@ # limitations under the License. # +## This properties file configures the logs generated by scala-test while running scala tests +## TODO: This file may not be needed as there is a log4j2.properties file which might take precedence + log4j.rootCategory=INFO, file log4j.appender.file=org.apache.log4j.FileAppender log4j.appender.file.append=true @@ -25,4 +28,4 @@ log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{ log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n \ No newline at end of file +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n diff --git a/integration_tests/src/test/resources/log4j2.properties b/integration_tests/src/test/resources/log4j2.properties index 90d7dd3d469..778f2e7e8e9 100644 --- a/integration_tests/src/test/resources/log4j2.properties +++ b/integration_tests/src/test/resources/log4j2.properties @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ # limitations under the License. # +## This properties file configures the logs generated by scala-test while running scala tests + # log level of log4j itself status=warn diff --git a/integration_tests/src/test/resources/pytest_log4j.properties b/integration_tests/src/test/resources/pytest_log4j.properties new file mode 100644 index 00000000000..6927ab2c1a7 --- /dev/null +++ b/integration_tests/src/test/resources/pytest_log4j.properties @@ -0,0 +1,42 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## This properties file is used to configure logs generated when integration tests using xdist + +log4j.appender.myConsoleAppender=org.apache.log4j.ConsoleAppender +log4j.appender.myConsoleAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.myConsoleAppender.layout.ConversionPattern=%d [%t] %-5p %c - %m%n + +log4j.appender.RollingAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.RollingAppender.File=${logfile} +log4j.appender.RollingAppender.DatePattern='.'yyyy-MM-dd +log4j.appender.RollingAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.RollingAppender.layout.ConversionPattern=[%p] %d %c %M - %m%n + +log4j.rootLogger=INFO, RollingAppender, myConsoleAppender + +log4j.logger.spark.storage=INFO, RollingAppender +log4j.additivity.spark.storage=false +log4j.logger.spark.scheduler=INFO, RollingAppender +log4j.additivity.spark.scheduler=false +log4j.logger.spark.CacheTracker=INFO, RollingAppender +log4j.additivity.spark.CacheTracker=false +log4j.logger.spark.CacheTrackerActor=INFO, RollingAppender +log4j.additivity.spark.CacheTrackerActor=false +log4j.logger.spark.MapOutputTrackerActor=INFO, RollingAppender +log4j.additivity.spark.MapOutputTrackerActor=false +log4j.logger.spark.MapOutputTracker=INFO, RollingAppender +log4j.additivty.spark.MapOutputTracker=false