From 2a3026dec9dca553c2be7d49f2d0e6c09a9f4589 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 18 Sep 2024 10:04:31 -0700 Subject: [PATCH 1/2] Change the Parquet writer's `default_row_group_size_bytes` from 128MB to inf (#16750) Closes #16733. This PR changes the default value of Parquet writer's default max row group size from 128MB to 1Million rows. This allows avoiding thin row group strips when writing wide (> 512 cols) tables resulting in a significantly improved read throughput for wide tables (especially when low cardinality) with virtually no impact to narrow-tables read performance. Benchmarked using: #16751 ## Results ### Hardware ``` GPU: NVIDIA RTX 5880 Ada Generation SM Version: 890 (PTX Version: 860) Number of SMs: 110 SM Default Clock Rate: 18446744071874 MHz Global Memory: 23879 MiB Free / 48632 MiB Total Global Memory Bus Peak: 960 GB/sec (384-bit DDR @10001MHz) Max Shared Memory: 100 KiB/SM, 48 KiB/Block L2 Cache Size: 98304 KiB Maximum Active Blocks: 24/SM Maximum Active Threads: 1536/SM, 1024/Block Available Registers: 65536/SM, 65536/Block ECC Enabled: No ``` ### Read Throughput ``` ## parquet_read_wide_tables_mixed | T | num_rows | num_cols | GPU Time_old | GPU Time_new | bytes_per_second_old | bytes_per_second_new | peak_memory_usage_old | peak_memory_usage_new | encoded_file_size_old | encoded_file_size_new | |-----------|----------|----------|----------------|----------------|----------------------|----------------------|-----------------------|-----------------------|-----------------------|-----------------------| | INTEGRAL | 10000 | 64 | 940.690 us | 928.387 us | 570720378014 | 578283256754 | 3.405 MiB | 3.405 MiB | 748.248 KiB | 748.248 KiB | | INTEGRAL | 100000 | 64 | 2.053 ms | 2.037 ms | 261541794543 | 263500220325 | 28.308 MiB | 28.308 MiB | 5.164 MiB | 5.164 MiB | | INTEGRAL | 500000 | 64 | 5.783 ms | 5.693 ms | 92838553328 | 94296134644 | 139.928 MiB | 139.042 MiB | 24.698 MiB | 24.325 MiB | | INTEGRAL | 1000000 | 64 | 11.400 ms | 10.775 ms | 47092763803 | 49824643807 | 279.254 MiB | 277.470 MiB | 49.042 MiB | 48.284 MiB | | INTEGRAL | 10000 | 256 | 1.718 ms | 1.732 ms | 312407306091 | 309935794547 | 13.752 MiB | 13.752 MiB | 2.956 MiB | 2.956 MiB | | INTEGRAL | 100000 | 256 | 5.726 ms | 5.818 ms | 93765292338 | 92275580643 | 114.366 MiB | 114.366 MiB | 20.743 MiB | 20.743 MiB | | INTEGRAL | 500000 | 256 | 25.179 ms | 22.159 ms | 21322289603 | 24228371776 | 572.905 MiB | 561.786 MiB | 103.796 MiB | 97.677 MiB | | INTEGRAL | 1000000 | 256 | 48.259 ms | 42.428 ms | 11124725758 | 12653746472 | 1.117 GiB | 1.095 GiB | 206.155 MiB | 193.886 MiB | | INTEGRAL | 10000 | 512 | 2.741 ms | 2.758 ms | 195853280055 | 194632437549 | 27.508 MiB | 27.508 MiB | 5.918 MiB | 5.918 MiB | | INTEGRAL | 100000 | 512 | 11.197 ms | 10.600 ms | 47945685016 | 50646524148 | 235.910 MiB | 228.755 MiB | 44.559 MiB | 41.510 MiB | | INTEGRAL | 500000 | 512 | 54.929 ms | 43.554 ms | 9773962645 | 12326557981 | 1.146 GiB | 1.097 GiB | 221.266 MiB | 195.384 MiB | | INTEGRAL | 1000000 | 512 | 103.779 ms | 82.403 ms | 5173195193 | 6515218035 | 2.288 GiB | 2.190 GiB | 442.101 MiB | 387.861 MiB | | INTEGRAL | 10000 | 1024 | 5.210 ms | 5.405 ms | 103040438112 | 99319591295 | 54.937 MiB | 54.937 MiB | 11.829 MiB | 11.829 MiB | | INTEGRAL | 100000 | 1024 | 26.891 ms | 20.194 ms | 19964357393 | 26585391032 | 498.410 MiB | 456.756 MiB | 99.962 MiB | 82.939 MiB | | INTEGRAL | 500000 | 1024 | 135.404 ms | 84.676 ms | 3964957208 | 6340314329 | 2.434 GiB | 2.191 GiB | 500.554 MiB | 390.418 MiB | | INTEGRAL | 1000000 | 1024 | 256.033 ms | 162.217 ms | 2096879057 | 3309593393 | 4.869 GiB | 4.372 GiB | 1001.573 MiB | 775.040 MiB | | FLOAT | 10000 | 64 | 962.219 us | 951.565 us | 557950915640 | 564197923891 | 5.275 MiB | 5.275 MiB | 1012.101 KiB | 1012.101 KiB | | FLOAT | 100000 | 64 | 2.032 ms | 2.032 ms | 264218700681 | 264250413360 | 45.321 MiB | 45.321 MiB | 6.316 MiB | 6.316 MiB | | FLOAT | 500000 | 64 | 6.660 ms | 6.693 ms | 80611279094 | 80219014175 | 224.129 MiB | 222.946 MiB | 29.685 MiB | 29.044 MiB | | FLOAT | 1000000 | 64 | 13.560 ms | 13.758 ms | 39591771965 | 39023315442 | 447.103 MiB | 445.007 MiB | 58.762 MiB | 57.482 MiB | | FLOAT | 10000 | 256 | 1.808 ms | 1.825 ms | 297020886609 | 294226222306 | 21.109 MiB | 21.109 MiB | 3.968 MiB | 3.968 MiB | | FLOAT | 100000 | 256 | 6.921 ms | 6.307 ms | 77571490752 | 85116522574 | 185.578 MiB | 181.271 MiB | 27.393 MiB | 25.256 MiB | | FLOAT | 500000 | 256 | 30.064 ms | 25.955 ms | 17857874786 | 20684696586 | 914.366 MiB | 891.787 MiB | 128.981 MiB | 116.186 MiB | | FLOAT | 1000000 | 256 | 59.189 ms | 48.592 ms | 9070460126 | 11048464794 | 1.787 GiB | 1.738 GiB | 258.075 MiB | 229.920 MiB | | FLOAT | 10000 | 512 | 2.998 ms | 3.006 ms | 179078195058 | 178594968077 | 42.222 MiB | 42.222 MiB | 7.941 MiB | 7.941 MiB | | FLOAT | 100000 | 512 | 14.160 ms | 12.314 ms | 37915291403 | 43597041127 | 376.553 MiB | 362.567 MiB | 60.136 MiB | 50.537 MiB | | FLOAT | 500000 | 512 | 69.524 ms | 50.251 ms | 7722076774 | 10683715204 | 1.826 GiB | 1.742 GiB | 292.552 MiB | 232.393 MiB | | FLOAT | 1000000 | 512 | 130.729 ms | 95.458 ms | 4106742786 | 5624164002 | 3.647 GiB | 3.477 GiB | 581.180 MiB | 459.927 MiB | | FLOAT | 10000 | 1024 | 6.351 ms | 6.492 ms | 84532884515 | 82693769317 | 84.452 MiB | 84.452 MiB | 15.893 MiB | 15.893 MiB | | FLOAT | 100000 | 1024 | 36.898 ms | 26.302 ms | 14550146722 | 20411596018 | 778.441 MiB | 725.125 MiB | 136.809 MiB | 101.066 MiB | | FLOAT | 500000 | 1024 | 166.699 ms | 98.340 ms | 3220600409 | 5459311820 | 3.802 GiB | 3.484 GiB | 685.702 MiB | 464.775 MiB | | FLOAT | 1000000 | 1024 | 339.687 ms | 188.463 ms | 1580487011 | 2848673918 | 7.606 GiB | 6.953 GiB | 1.340 GiB | 919.840 MiB | | DECIMAL | 10000 | 64 | 1.076 ms | 1.092 ms | 498752693210 | 491676757508 | 7.485 MiB | 7.485 MiB | 1.216 MiB | 1.216 MiB | | DECIMAL | 100000 | 64 | 2.166 ms | 2.172 ms | 247840684988 | 247198078197 | 65.498 MiB | 65.498 MiB | 6.658 MiB | 6.658 MiB | | DECIMAL | 500000 | 64 | 7.421 ms | 7.058 ms | 72343289850 | 76066836305 | 325.515 MiB | 322.466 MiB | 31.349 MiB | 29.384 MiB | | DECIMAL | 1000000 | 64 | 15.239 ms | 14.020 ms | 35230516583 | 38291860266 | 649.547 MiB | 643.714 MiB | 61.759 MiB | 57.826 MiB | | DECIMAL | 10000 | 256 | 1.989 ms | 1.989 ms | 269930562597 | 269886680781 | 30.119 MiB | 30.119 MiB | 4.896 MiB | 4.896 MiB | | DECIMAL | 100000 | 256 | 7.839 ms | 6.966 ms | 68483613468 | 77073587059 | 269.638 MiB | 263.547 MiB | 30.588 MiB | 26.664 MiB | | DECIMAL | 500000 | 256 | 35.199 ms | 26.893 ms | 15252335676 | 19963411264 | 1.312 GiB | 1.267 GiB | 150.948 MiB | 117.601 MiB | | DECIMAL | 1000000 | 256 | 72.584 ms | 50.944 ms | 7396511691 | 10538553316 | 2.622 GiB | 2.529 GiB | 301.231 MiB | 231.353 MiB | | DECIMAL | 10000 | 512 | 3.612 ms | 3.595 ms | 148642296188 | 149335059500 | 60.283 MiB | 60.283 MiB | 9.801 MiB | 9.801 MiB | | DECIMAL | 100000 | 512 | 19.820 ms | 14.084 ms | 27087819156 | 38119174003 | 562.417 MiB | 527.494 MiB | 75.263 MiB | 53.349 MiB | | DECIMAL | 500000 | 512 | 94.913 ms | 51.910 ms | 5656452419 | 10342308581 | 2.747 GiB | 2.536 GiB | 377.112 MiB | 235.187 MiB | | DECIMAL | 1000000 | 512 | 180.513 ms | 98.562 ms | 2974131976 | 5447057883 | 5.494 GiB | 5.063 GiB | 754.738 MiB | 462.785 MiB | | DECIMAL | 10000 | 1024 | 7.667 ms | 6.777 ms | 70025338013 | 79218913933 | 120.656 MiB | 120.656 MiB | 19.616 MiB | 19.616 MiB | | DECIMAL | 100000 | 1024 | 61.182 ms | 26.946 ms | 8775038947 | 19923803470 | 1.184 GiB | 1.031 GiB | 201.928 MiB | 106.705 MiB | | DECIMAL | 500000 | 1024 | 261.218 ms | 102.314 ms | 2055261558 | 5247292283 | 5.921 GiB | 5.076 GiB | 1012.826 MiB | 470.402 MiB | | DECIMAL | 1000000 | 1024 | 513.386 ms | 196.347 ms | 1045744543 | 2734301880 | 11.843 GiB | 10.133 GiB | 1.980 GiB | 925.576 MiB | | TIMESTAMP | 10000 | 64 | 1.014 ms | 1.016 ms | 529606978079 | 528414399822 | 6.079 MiB | 6.079 MiB | 1.068 MiB | 1.068 MiB | | TIMESTAMP | 100000 | 64 | 2.057 ms | 2.053 ms | 261019684779 | 261455248599 | 52.688 MiB | 52.688 MiB | 6.436 MiB | 6.436 MiB | | TIMESTAMP | 500000 | 64 | 6.950 ms | 6.761 ms | 77245644716 | 79410211533 | 260.606 MiB | 259.304 MiB | 29.924 MiB | 29.164 MiB | | TIMESTAMP | 1000000 | 64 | 14.506 ms | 13.832 ms | 37010291008 | 38813599633 | 521.240 MiB | 517.604 MiB | 59.878 MiB | 57.601 MiB | | TIMESTAMP | 10000 | 256 | 1.878 ms | 1.889 ms | 285887176743 | 284275145551 | 24.328 MiB | 24.328 MiB | 4.290 MiB | 4.290 MiB | | TIMESTAMP | 100000 | 256 | 7.198 ms | 6.458 ms | 74586920018 | 83128450019 | 215.854 MiB | 210.739 MiB | 28.681 MiB | 25.734 MiB | | TIMESTAMP | 500000 | 256 | 34.185 ms | 26.654 ms | 15705060785 | 20142331826 | 1.044 GiB | 1.013 GiB | 137.016 MiB | 116.663 MiB | | TIMESTAMP | 1000000 | 256 | 66.420 ms | 49.599 ms | 8083007343 | 10824295857 | 2.085 GiB | 2.022 GiB | 272.580 MiB | 230.395 MiB | | TIMESTAMP | 10000 | 512 | 3.143 ms | 3.150 ms | 170821086658 | 170446277893 | 48.702 MiB | 48.702 MiB | 8.591 MiB | 8.591 MiB | | TIMESTAMP | 100000 | 512 | 17.652 ms | 12.615 ms | 30413872283 | 42557024194 | 440.115 MiB | 421.891 MiB | 63.197 MiB | 51.502 MiB | | TIMESTAMP | 500000 | 512 | 75.454 ms | 50.955 ms | 7115233856 | 10536117334 | 2.146 GiB | 2.028 GiB | 315.073 MiB | 233.355 MiB | | TIMESTAMP | 1000000 | 512 | 140.692 ms | 95.964 ms | 3815935506 | 5594485106 | 4.285 GiB | 4.048 GiB | 627.348 MiB | 460.885 MiB | | TIMESTAMP | 10000 | 1024 | 6.436 ms | 6.975 ms | 83411903593 | 76971777095 | 97.454 MiB | 97.454 MiB | 17.196 MiB | 17.196 MiB | | TIMESTAMP | 100000 | 1024 | 45.659 ms | 26.728 ms | 11758159876 | 20086145129 | 936.005 MiB | 844.159 MiB | 159.908 MiB | 103.000 MiB | | TIMESTAMP | 500000 | 1024 | 199.636 ms | 99.231 ms | 2689242353 | 5410303529 | 4.557 GiB | 4.057 GiB | 794.728 MiB | 466.703 MiB | | TIMESTAMP | 1000000 | 1024 | 372.691 ms | 192.598 ms | 1440523696 | 2787517681 | 9.104 GiB | 8.099 GiB | 1.551 GiB | 921.760 MiB | | DURATION | 10000 | 64 | 986.208 us | 989.153 us | 544379023579 | 542758221495 | 6.417 MiB | 6.417 MiB | 932.501 KiB | 932.501 KiB | | DURATION | 100000 | 64 | 2.222 ms | 2.018 ms | 241594183626 | 266034888500 | 57.291 MiB | 57.291 MiB | 6.079 MiB | 6.079 MiB | | DURATION | 500000 | 64 | 6.642 ms | 6.673 ms | 80830328889 | 80453377113 | 284.029 MiB | 283.224 MiB | 28.819 MiB | 28.288 MiB | | DURATION | 1000000 | 64 | 13.150 ms | 13.488 ms | 40828039129 | 39804805295 | 567.280 MiB | 565.669 MiB | 57.137 MiB | 56.075 MiB | | DURATION | 10000 | 256 | 1.805 ms | 1.815 ms | 297459887040 | 295856879191 | 25.686 MiB | 25.686 MiB | 3.665 MiB | 3.665 MiB | | DURATION | 100000 | 256 | 6.839 ms | 6.270 ms | 78502421937 | 85630914910 | 232.874 MiB | 229.165 MiB | 25.863 MiB | 24.323 MiB | | DURATION | 500000 | 256 | 29.886 ms | 26.234 ms | 17964080662 | 20464503730 | 1.125 GiB | 1.106 GiB | 123.885 MiB | 113.179 MiB | | DURATION | 1000000 | 256 | 58.290 ms | 48.418 ms | 9210348188 | 11088351436 | 2.250 GiB | 2.210 GiB | 247.272 MiB | 224.312 MiB | | DURATION | 10000 | 512 | 3.035 ms | 2.964 ms | 176885037888 | 181108374773 | 51.383 MiB | 51.383 MiB | 7.342 MiB | 7.342 MiB | | DURATION | 100000 | 512 | 14.492 ms | 12.136 ms | 37044853523 | 44237579412 | 474.355 MiB | 458.371 MiB | 55.996 MiB | 48.689 MiB | | DURATION | 500000 | 512 | 70.131 ms | 51.095 ms | 7655286246 | 10507294503 | 2.299 GiB | 2.213 GiB | 271.064 MiB | 226.438 MiB | | DURATION | 1000000 | 512 | 132.495 ms | 95.019 ms | 4051999205 | 5650150759 | 4.593 GiB | 4.419 GiB | 541.495 MiB | 448.815 MiB | | DURATION | 10000 | 1024 | 6.576 ms | 6.318 ms | 81638807422 | 84977253627 | 102.782 MiB | 102.782 MiB | 14.701 MiB | 14.701 MiB | | DURATION | 100000 | 1024 | 38.001 ms | 26.011 ms | 14127627316 | 20640219375 | 964.471 MiB | 916.755 MiB | 127.532 MiB | 97.394 MiB | | DURATION | 500000 | 1024 | 159.928 ms | 98.126 ms | 3356945213 | 5471258270 | 4.711 GiB | 4.426 GiB | 639.050 MiB | 452.925 MiB | | DURATION | 1000000 | 1024 | 305.818 ms | 188.647 ms | 1755524869 | 2845895428 | 9.422 GiB | 8.839 GiB | 1.249 GiB | 897.737 MiB | | STRING | 10000 | 64 | 2.241 ms | 2.244 ms | 239611491431 | 239240518530 | 15.926 MiB | 15.926 MiB | 2.075 MiB | 2.075 MiB | | STRING | 100000 | 64 | 4.862 ms | 4.822 ms | 110419679907 | 111346705245 | 132.646 MiB | 132.646 MiB | 8.087 MiB | 8.087 MiB | | STRING | 500000 | 64 | 20.498 ms | 17.812 ms | 26191957819 | 30140554720 | 664.294 MiB | 645.028 MiB | 40.456 MiB | 30.817 MiB | | STRING | 1000000 | 64 | 37.773 ms | 34.985 ms | 14213079575 | 15345709268 | 1.298 GiB | 1.255 GiB | 80.941 MiB | 59.259 MiB | | STRING | 10000 | 256 | 4.125 ms | 4.171 ms | 130163506067 | 128706550148 | 63.789 MiB | 63.789 MiB | 8.319 MiB | 8.319 MiB | | STRING | 100000 | 256 | 22.074 ms | 17.799 ms | 24321103825 | 30162947098 | 584.754 MiB | 530.912 MiB | 58.602 MiB | 32.330 MiB | | STRING | 500000 | 256 | 93.278 ms | 66.770 ms | 5755572906 | 8040584271 | 2.857 GiB | 2.521 GiB | 294.130 MiB | 123.271 MiB | | STRING | 1000000 | 256 | 190.999 ms | 122.359 ms | 2810851154 | 4387682165 | 5.715 GiB | 5.023 GiB | 588.586 MiB | 237.018 MiB | | STRING | 10000 | 512 | 7.520 ms | 8.010 ms | 71390390607 | 67021971176 | 127.538 MiB | 127.538 MiB | 16.634 MiB | 16.634 MiB | | STRING | 100000 | 512 | 51.666 ms | 32.251 ms | 10391219810 | 16646741143 | 1.259 GiB | 1.037 GiB | 173.940 MiB | 64.682 MiB | | STRING | 500000 | 512 | 251.723 ms | 125.963 ms | 2132782858 | 4262141577 | 6.300 GiB | 5.040 GiB | 873.437 MiB | 246.559 MiB | | STRING | 1000000 | 512 | 477.668 ms | 244.912 ms | 1123940871 | 2192101011 | 12.602 GiB | 10.044 GiB | 1.707 GiB | 474.121 MiB | | STRING | 10000 | 1024 | 17.184 ms | 16.128 ms | 31242201518 | 33288874029 | 276.395 MiB | 254.971 MiB | 40.126 MiB | 33.243 MiB | | STRING | 100000 | 1024 | 132.094 ms | 63.304 ms | 4064323158 | 8480799642 | 2.721 GiB | 2.073 GiB | 414.092 MiB | 129.316 MiB | | STRING | 500000 | 1024 | 608.283 ms | 251.026 ms | 882600977 | 2138709222 | 13.618 GiB | 10.076 GiB | 2.028 GiB | 493.067 MiB | | STRING | 1000000 | 1024 | 1.249 s | 485.734 ms | 429750505 | 1105276473 | 27.239 GiB | 20.079 GiB | 4.059 GiB | 948.185 MiB | ``` Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Nghia Truong (https://github.com/ttnghia) - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: https://github.com/rapidsai/cudf/pull/16750 --- cpp/include/cudf/io/parquet.hpp | 5 +++-- cpp/src/io/parquet/writer_impl.cu | 10 ++++++++-- python/cudf/cudf/_lib/parquet.pyx | 16 ++++++++-------- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/io/parquet.py | 8 ++++---- python/cudf/cudf/utils/ioutils.py | 12 ++++-------- python/dask_cudf/dask_cudf/io/parquet.py | 7 ++----- 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index ed7b2ac0850..ee03a382bec 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -39,8 +39,9 @@ namespace io { * @file */ -constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; ///< 128MB per row group -constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group +constexpr size_t default_row_group_size_bytes = + std::numeric_limits::max(); ///< Infinite bytes per row group +constexpr size_type default_row_group_size_rows = 1'000'000; ///< 1 million rows per row group constexpr size_t default_max_page_size_bytes = 512 * 1024; ///< 512KB per page constexpr size_type default_max_page_size_rows = 20000; ///< 20k rows per page constexpr int32_t default_column_index_truncate_length = 64; ///< truncate to 64 bytes diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 81fd4ab9f82..ec05f35d405 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -1819,8 +1819,14 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, auto const table_size = std::reduce(column_sizes.begin(), column_sizes.end()); auto const avg_row_len = util::div_rounding_up_safe(table_size, input.num_rows()); if (avg_row_len > 0) { - auto const rg_frag_size = util::div_rounding_up_safe(max_row_group_size, avg_row_len); - max_page_fragment_size = std::min(rg_frag_size, max_page_fragment_size); + // Ensure `rg_frag_size` is not bigger than size_type::max for default max_row_group_size + // value (=uint64::max) to avoid a sign overflow when comparing + auto const rg_frag_size = + std::min(std::numeric_limits::max(), + util::div_rounding_up_safe(max_row_group_size, avg_row_len)); + // Safe comparison as rg_frag_size fits in size_type + max_page_fragment_size = + std::min(static_cast(rg_frag_size), max_page_fragment_size); } // dividing page size by average row length will tend to overshoot the desired diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index a0155671a26..e6c9d60b05b 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -438,7 +438,7 @@ def write_parquet( object statistics="ROWGROUP", object metadata_file_path=None, object int96_timestamps=False, - object row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT, + object row_group_size_bytes=None, object row_group_size_rows=None, object max_page_size_bytes=None, object max_page_size_rows=None, @@ -616,9 +616,9 @@ cdef class ParquetWriter: Name of the compression to use. Use ``None`` for no compression. statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP' Level at which column statistics should be included in file. - row_group_size_bytes: int, default 134217728 + row_group_size_bytes: int, default ``uint64 max`` Maximum size of each stripe of the output. - By default, 134217728 (128MB) will be used. + By default, a virtually infinite size equal to ``uint64 max`` will be used. row_group_size_rows: int, default 1000000 Maximum number of rows of each stripe of the output. By default, 1000000 (10^6 rows) will be used. @@ -661,11 +661,11 @@ cdef class ParquetWriter: def __cinit__(self, object filepath_or_buffer, object index=None, object compression="snappy", str statistics="ROWGROUP", - int row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT, - int row_group_size_rows=1000000, - int max_page_size_bytes=524288, - int max_page_size_rows=20000, - int max_dictionary_size=1048576, + size_t row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT, + size_type row_group_size_rows=1000000, + size_t max_page_size_bytes=524288, + size_type max_page_size_rows=20000, + size_t max_dictionary_size=1048576, bool use_dictionary=True, bool store_schema=False): filepaths_or_buffers = ( diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 58a16a6d504..d73ad8225ca 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6840,7 +6840,7 @@ def to_parquet( statistics="ROWGROUP", metadata_file_path=None, int96_timestamps=False, - row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT, + row_group_size_bytes=None, row_group_size_rows=None, max_page_size_bytes=None, max_page_size_rows=None, diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index 62be7378e9e..ce99f98b559 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -64,7 +64,7 @@ def _write_parquet( statistics="ROWGROUP", metadata_file_path=None, int96_timestamps=False, - row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT, + row_group_size_bytes=None, row_group_size_rows=None, max_page_size_bytes=None, max_page_size_rows=None, @@ -149,7 +149,7 @@ def write_to_dataset( return_metadata=False, statistics="ROWGROUP", int96_timestamps=False, - row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT, + row_group_size_bytes=None, row_group_size_rows=None, max_page_size_bytes=None, max_page_size_rows=None, @@ -205,7 +205,7 @@ def write_to_dataset( If ``False``, timestamps will not be altered. row_group_size_bytes: integer or None, default None Maximum size of each stripe of the output. - If None, 134217728 (128MB) will be used. + If None, no limit on row group stripe size will be used. row_group_size_rows: integer or None, default None Maximum number of rows of each stripe of the output. If None, 1000000 will be used. @@ -980,7 +980,7 @@ def to_parquet( statistics="ROWGROUP", metadata_file_path=None, int96_timestamps=False, - row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT, + row_group_size_bytes=None, row_group_size_rows=None, max_page_size_bytes=None, max_page_size_rows=None, diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 1627107b57d..1180da321e6 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -27,7 +27,7 @@ fsspec_parquet = None _BYTES_PER_THREAD_DEFAULT = 256 * 1024 * 1024 -_ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024 +_ROW_GROUP_SIZE_BYTES_DEFAULT = np.iinfo(np.uint64).max _docstring_remote_sources = """ - cuDF supports local and remote data stores. See configuration details for @@ -275,10 +275,9 @@ timestamp[us] to the int96 format, which is the number of Julian days and the number of nanoseconds since midnight of 1970-01-01. If ``False``, timestamps will not be altered. -row_group_size_bytes: integer, default {row_group_size_bytes_val} +row_group_size_bytes: integer, default None Maximum size of each stripe of the output. - If None, {row_group_size_bytes_val} - ({row_group_size_bytes_val_in_mb} MB) will be used. + If None, no limit on row group stripe size will be used. row_group_size_rows: integer or None, default None Maximum number of rows of each stripe of the output. If None, 1000000 will be used. @@ -346,10 +345,7 @@ See Also -------- cudf.read_parquet -""".format( - row_group_size_bytes_val=_ROW_GROUP_SIZE_BYTES_DEFAULT, - row_group_size_bytes_val_in_mb=_ROW_GROUP_SIZE_BYTES_DEFAULT / 1024 / 1024, -) +""" doc_to_parquet = docfmt_partial(docstring=_docstring_to_parquet) _docstring_merge_parquet_filemetadata = """ diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index e793d4381d1..a781b8242fe 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -23,7 +23,6 @@ from cudf.io import write_to_dataset from cudf.io.parquet import _apply_post_filters, _normalize_filters from cudf.utils.dtypes import cudf_dtype_from_pa_type -from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT class CudfEngine(ArrowDatasetEngine): @@ -341,9 +340,7 @@ def write_partition( return_metadata=return_metadata, statistics=kwargs.get("statistics", "ROWGROUP"), int96_timestamps=kwargs.get("int96_timestamps", False), - row_group_size_bytes=kwargs.get( - "row_group_size_bytes", _ROW_GROUP_SIZE_BYTES_DEFAULT - ), + row_group_size_bytes=kwargs.get("row_group_size_bytes", None), row_group_size_rows=kwargs.get("row_group_size_rows", None), max_page_size_bytes=kwargs.get("max_page_size_bytes", None), max_page_size_rows=kwargs.get("max_page_size_rows", None), @@ -365,7 +362,7 @@ def write_partition( statistics=kwargs.get("statistics", "ROWGROUP"), int96_timestamps=kwargs.get("int96_timestamps", False), row_group_size_bytes=kwargs.get( - "row_group_size_bytes", _ROW_GROUP_SIZE_BYTES_DEFAULT + "row_group_size_bytes", None ), row_group_size_rows=kwargs.get( "row_group_size_rows", None From e68f55c98f257bdeedeb31e68c9737264bd0b393 Mon Sep 17 00:00:00 2001 From: Srinivas Yadav <43375352+srinivasyadav18@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:12:23 -0500 Subject: [PATCH 2/2] Refactor mixed_semi_join using cuco::static_set (#16230) This PR refactors `mixed_semi_join` by replacing **cuco** legacy `static_map` with latest `static_set`. Contributes to #12261. Authors: - Srinivas Yadav (https://github.com/srinivasyadav18) - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/16230 --- cpp/src/join/join_common_utils.hpp | 6 -- cpp/src/join/mixed_join_common_utils.cuh | 33 +++++++++ cpp/src/join/mixed_join_kernels_semi.cu | 35 ++++----- cpp/src/join/mixed_join_kernels_semi.cuh | 6 +- cpp/src/join/mixed_join_semi.cu | 90 +++++++----------------- cpp/tests/join/mixed_join_tests.cu | 30 ++++++++ 6 files changed, 109 insertions(+), 91 deletions(-) diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 86402a0e7de..573101cefd9 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -22,7 +22,6 @@ #include #include -#include #include #include @@ -51,11 +50,6 @@ using mixed_multimap_type = cudf::detail::cuco_allocator, cuco::legacy::double_hashing<1, hash_type, hash_type>>; -using semi_map_type = cuco::legacy::static_map>; - using row_hash_legacy = cudf::row_hasher; diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh index 19701816867..89c13285cfe 100644 --- a/cpp/src/join/mixed_join_common_utils.cuh +++ b/cpp/src/join/mixed_join_common_utils.cuh @@ -25,6 +25,7 @@ #include #include +#include namespace cudf { namespace detail { @@ -160,6 +161,38 @@ struct pair_expression_equality : public expression_equality { } }; +/** + * @brief Equality comparator that composes two row_equality comparators. + */ +struct double_row_equality_comparator { + row_equality const equality_comparator; + row_equality const conditional_comparator; + + __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept + { + using experimental::row::lhs_index_type; + using experimental::row::rhs_index_type; + + return equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) && + conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}); + } +}; + +// A CUDA Cooperative Group of 4 threads for the hash set. +auto constexpr DEFAULT_MIXED_JOIN_CG_SIZE = 4; + +// The hash set type used by mixed_semi_join with the build_table. +using hash_set_type = cuco::static_set, + cuda::thread_scope_device, + double_row_equality_comparator, + cuco::linear_probing, + cudf::detail::cuco_allocator, + cuco::storage<1>>; + +// The hash_set_ref_type used by mixed_semi_join kerenels for probing. +using hash_set_ref_type = hash_set_type::ref_type; + } // namespace detail } // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index 7459ac3e99c..f2c5ff13638 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -38,12 +38,16 @@ CUDF_KERNEL void __launch_bounds__(block_size) table_device_view right_table, table_device_view probe, table_device_view build, - row_hash const hash_probe, row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, + hash_set_ref_type set_ref, cudf::device_span left_table_keep_mask, cudf::ast::detail::expression_device_view device_expression_data) { + auto constexpr cg_size = hash_set_ref_type::cg_size; + + auto const tile = + cooperative_groups::tiled_partition(cooperative_groups::this_thread_block()); + // Normally the casting of a shared memory array is used to create multiple // arrays of different types from the shared memory buffer, but here it is // used to circumvent conflicts between arrays of different types between @@ -52,24 +56,24 @@ CUDF_KERNEL void __launch_bounds__(block_size) cudf::ast::detail::IntermediateDataType* intermediate_storage = reinterpret_cast*>(raw_intermediate_storage); auto thread_intermediate_storage = - &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates]; - - cudf::size_type const left_num_rows = left_table.num_rows(); - cudf::size_type const right_num_rows = right_table.num_rows(); - auto const outer_num_rows = left_num_rows; + &intermediate_storage[tile.meta_group_rank() * device_expression_data.num_intermediates]; - cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size; + cudf::size_type const outer_num_rows = left_table.num_rows(); + auto const outer_row_index = cudf::detail::grid_1d::global_thread_id() / cg_size; auto evaluator = cudf::ast::detail::expression_evaluator( left_table, right_table, device_expression_data); if (outer_row_index < outer_num_rows) { + // Make sure to swap_tables here as hash_set will use probe table as the left one. + auto constexpr swap_tables = true; // Figure out the number of elements for this key. auto equality = single_expression_equality{ - evaluator, thread_intermediate_storage, false, equality_probe}; + evaluator, thread_intermediate_storage, swap_tables, equality_probe}; - left_table_keep_mask[outer_row_index] = - hash_table_view.contains(outer_row_index, hash_probe, equality); + auto const set_ref_equality = set_ref.with_key_eq(equality); + auto const result = set_ref_equality.contains(tile, outer_row_index); + if (tile.thread_rank() == 0) left_table_keep_mask[outer_row_index] = result; } } @@ -78,9 +82,8 @@ void launch_mixed_join_semi(bool has_nulls, table_device_view right_table, table_device_view probe, table_device_view build, - row_hash const hash_probe, row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, + hash_set_ref_type set_ref, cudf::device_span left_table_keep_mask, cudf::ast::detail::expression_device_view device_expression_data, detail::grid_1d const config, @@ -94,9 +97,8 @@ void launch_mixed_join_semi(bool has_nulls, right_table, probe, build, - hash_probe, equality_probe, - hash_table_view, + set_ref, left_table_keep_mask, device_expression_data); } else { @@ -106,9 +108,8 @@ void launch_mixed_join_semi(bool has_nulls, right_table, probe, build, - hash_probe, equality_probe, - hash_table_view, + set_ref, left_table_keep_mask, device_expression_data); } diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh index 43714ffb36a..b08298e64e4 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cuh +++ b/cpp/src/join/mixed_join_kernels_semi.cuh @@ -45,9 +45,8 @@ namespace detail { * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. * @param[in] build The table with which the hash table was built. - * @param[in] hash_probe The hasher used for the probe table. * @param[in] equality_probe The equality comparator used when probing the hash table. - * @param[in] hash_table_view The hash table built from `build`. + * @param[in] set_ref The hash table device view built from `build`. * @param[out] left_table_keep_mask The result of the join operation with "true" element indicating * the corresponding index from left table is present in output * @param[in] device_expression_data Container of device data required to evaluate the desired @@ -58,9 +57,8 @@ void launch_mixed_join_semi(bool has_nulls, table_device_view right_table, table_device_view probe, table_device_view build, - row_hash const hash_probe, row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, + hash_set_ref_type set_ref, cudf::device_span left_table_keep_mask, cudf::ast::detail::expression_device_view device_expression_data, detail::grid_1d const config, diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index cfb785e242c..719b1d47105 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -46,45 +46,6 @@ namespace cudf { namespace detail { -namespace { -/** - * @brief Device functor to create a pair of hash value and index for a given row. - */ -struct make_pair_function_semi { - __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept - { - // The value is irrelevant since we only ever use the hash map to check for - // membership of a particular row index. - return cuco::make_pair(static_cast(i), 0); - } -}; - -/** - * @brief Equality comparator that composes two row_equality comparators. - */ -class double_row_equality { - public: - double_row_equality(row_equality equality_comparator, row_equality conditional_comparator) - : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator} - { - } - - __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept - { - using experimental::row::lhs_index_type; - using experimental::row::rhs_index_type; - - return _equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) && - _conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}); - } - - private: - row_equality _equality_comparator; - row_equality _conditional_comparator; -}; - -} // namespace - std::unique_ptr> mixed_join_semi( table_view const& left_equality, table_view const& right_equality, @@ -96,7 +57,7 @@ std::unique_ptr> mixed_join_semi( rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) && + CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) and (join_type != join_kind::LEFT_JOIN) and (join_type != join_kind::FULL_JOIN), "Inner, left, and full joins should use mixed_join."); @@ -137,7 +98,7 @@ std::unique_ptr> mixed_join_semi( // output column and follow the null-supporting expression evaluation code // path. auto const has_nulls = cudf::nullate::DYNAMIC{ - cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) || + cudf::has_nulls(left_equality) or cudf::has_nulls(right_equality) or binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)}; auto const parser = ast::detail::expression_parser{ @@ -156,27 +117,20 @@ std::unique_ptr> mixed_join_semi( auto right_conditional_view = table_device_view::create(right_conditional, stream); auto const preprocessed_build = - experimental::row::equality::preprocessed_table::create(build, stream); + cudf::experimental::row::equality::preprocessed_table::create(build, stream); auto const preprocessed_probe = - experimental::row::equality::preprocessed_table::create(probe, stream); + cudf::experimental::row::equality::preprocessed_table::create(probe, stream); auto const row_comparator = - cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; + cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe}; auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); - semi_map_type hash_table{ - compute_hash_table_size(build.num_rows()), - cuco::empty_key{std::numeric_limits::max()}, - cuco::empty_value{cudf::detail::JoinNoneValue}, - cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, - stream.value()}; - // Create hash table containing all keys found in right table // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. auto const build_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(build)}; auto const row_hash_build = cudf::experimental::row::hash::row_hasher{preprocessed_build}; - auto const hash_build = row_hash_build.device_hasher(build_nulls); + // Since we may see multiple rows that are identical in the equality tables // but differ in the conditional tables, the equality comparator used for // insertion must account for both sets of tables. An alternative solution @@ -191,20 +145,28 @@ std::unique_ptr> mixed_join_semi( auto const equality_build_equality = row_comparator_build.equal_to(build_nulls, compare_nulls); auto const preprocessed_build_condtional = - experimental::row::equality::preprocessed_table::create(right_conditional, stream); + cudf::experimental::row::equality::preprocessed_table::create(right_conditional, stream); auto const row_comparator_conditional_build = cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional, preprocessed_build_condtional}; auto const equality_build_conditional = row_comparator_conditional_build.equal_to(build_nulls, compare_nulls); - double_row_equality equality_build{equality_build_equality, equality_build_conditional}; - make_pair_function_semi pair_func_build{}; - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build); + hash_set_type row_set{ + {compute_hash_table_size(build.num_rows())}, + cuco::empty_key{JoinNoneValue}, + {equality_build_equality, equality_build_conditional}, + {row_hash_build.device_hasher(build_nulls)}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + {stream.value()}}; + + auto iter = thrust::make_counting_iterator(0); // skip rows that are null here. if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) { - hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); + row_set.insert(iter, iter + right_num_rows, stream.value()); } else { thrust::counting_iterator stencil(0); auto const [row_bitmask, _] = @@ -212,18 +174,19 @@ std::unique_ptr> mixed_join_semi( row_is_valid pred{static_cast(row_bitmask.data())}; // insert valid rows - hash_table.insert_if( - iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value()); + row_set.insert_if(iter, iter + right_num_rows, stencil, pred, stream.value()); } - auto hash_table_view = hash_table.get_device_view(); - detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE); - auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; + auto const shmem_size_per_block = + parser.shmem_per_thread * + cuco::detail::int_div_ceil(config.num_threads_per_block, hash_set_type::cg_size); auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; auto const hash_probe = row_hash.device_hasher(has_nulls); + hash_set_ref_type const row_set_ref = row_set.ref(cuco::contains).with_hash_function(hash_probe); + // Vector used to indicate indices from left/probe table which are present in output auto left_table_keep_mask = rmm::device_uvector(probe.num_rows(), stream); @@ -232,9 +195,8 @@ std::unique_ptr> mixed_join_semi( *right_conditional_view, *probe_view, *build_view, - hash_probe, equality_probe, - hash_table_view, + row_set_ref, cudf::device_span(left_table_keep_mask), parser.device_expression_data, config, diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu index 6c147c8a128..08a0136700d 100644 --- a/cpp/tests/join/mixed_join_tests.cu +++ b/cpp/tests/join/mixed_join_tests.cu @@ -778,6 +778,21 @@ TYPED_TEST(MixedLeftSemiJoinTest, BasicEquality) {1}); } +TYPED_TEST(MixedLeftSemiJoinTest, MixedLeftSemiJoinGatherMap) +{ + auto const col_ref_left_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::LEFT); + auto const col_ref_right_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); + auto left_one_greater_right_one = + cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_left_1, col_ref_right_1); + + this->test({{2, 3, 9, 0, 1, 7, 4, 6, 5, 8}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}}, + {{6, 5, 9, 8, 10, 32}, {0, 1, 2, 3, 4, 5}, {7, 8, 9, 0, 1, 2}}, + {0}, + {1}, + left_one_greater_right_one, + {2, 7, 8}); +} + TYPED_TEST(MixedLeftSemiJoinTest, BasicEqualityDuplicates) { this->test({{0, 1, 2, 1}, {3, 4, 5, 6}, {10, 20, 30, 40}}, @@ -900,3 +915,18 @@ TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricLeftLargerEquality) left_zero_eq_right_zero, {0, 1, 3}); } + +TYPED_TEST(MixedLeftAntiJoinTest, MixedLeftAntiJoinGatherMap) +{ + auto const col_ref_left_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::LEFT); + auto const col_ref_right_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT); + auto left_one_greater_right_one = + cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_left_1, col_ref_right_1); + + this->test({{2, 3, 9, 0, 1, 7, 4, 6, 5, 8}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}}, + {{6, 5, 9, 8, 10, 32}, {0, 1, 2, 3, 4, 5}, {7, 8, 9, 0, 1, 2}}, + {0}, + {1}, + left_one_greater_right_one, + {0, 1, 3, 4, 5, 6, 9}); +}