Skip to content

Commit

Permalink
Add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Chong Gao committed Dec 25, 2023
1 parent ffbd87e commit 80bdc20
Showing 1 changed file with 15 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,19 @@ import org.apache.spark.sql.types._
/**
* A simple test performance framework for non-UTC features.
* Usage:
* update enablePerfTest = true in this code
* update enablePerfTest = true in this code
* mvn test -Dbuildver=311 -DwildcardSuites=com.nvidia.spark.rapids.timezone.TimeZonePerfSuite
* Note:
* Generate a Parquet file with 6 columns:
* - c_ts: timestamp column
* - c_long_of_ts: long value which is microseconds
* - c_date: date column
* - c_int_of_date:int value which is days from 1970-01-01
* - c_str_for_cast: strings for cast to timestamp, formats are yyyy, yyyy-mm, ...
* - c_str_of_ts: strings with format: yyyy-MM-dd HH:mm:ss
* Each column is high duplicated.
* The generated file is highly compressed since we expect both CPU and GPU can scan quickly.
* When testing operators, we need to add in a max/count aggregator to reduce the result data.
*/
class TimeZonePerfSuite extends SparkQueryCompareTestSuite with BeforeAndAfterAll {
// perf test is disabled by default since it's a long running time in UT.
Expand Down Expand Up @@ -145,11 +157,12 @@ class TimeZonePerfSuite extends SparkQueryCompareTestSuite with BeforeAndAfterAl
}

test("test from_utc_timestamp") {
// cache time zone DB in advance
GpuTimeZoneDB.cacheDatabase()
Thread.sleep(5L)

def perfTest(spark: SparkSession, zone: String): DataFrame = {
spark.read.parquet(path).select(functions.max( // max
spark.read.parquet(path).select(functions.max( // use max to reduce the result data
functions.from_utc_timestamp(functions.col("c_ts"), zone)
))
}
Expand Down

0 comments on commit 80bdc20

Please sign in to comment.