diff --git a/.unreleased/pr_7557 b/.unreleased/pr_7557 new file mode 100644 index 00000000000..ed4781fb703 --- /dev/null +++ b/.unreleased/pr_7557 @@ -0,0 +1 @@ +Fixes: #7557 Fix NULL handling for in-memory tuple filtering diff --git a/tsl/src/compression/compression_dml.c b/tsl/src/compression/compression_dml.c index ab72d316f53..573039b3ca5 100644 --- a/tsl/src/compression/compression_dml.c +++ b/tsl/src/compression/compression_dml.c @@ -574,6 +574,21 @@ decompress_batches_scan(Relation in_rel, Relation out_rel, Relation index_rel, S return stats; } +static bool +slot_keys_test(TupleTableSlot *slot, int nkeys, ScanKey keys) +{ + int cur_nkeys = nkeys; + ScanKey cur_key = keys; + + for (; cur_nkeys--; cur_key++) + { + if (!slot_key_test(slot, cur_key)) + return false; + } + + return true; +} + static bool batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scankeys, tuple_filtering_constraints *constraints, bool *skip_current_tuple) @@ -585,12 +600,7 @@ batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scan for (int row = 0; row < num_tuples; row++) { TupleTableSlot *decompressed_slot = decompressor->decompressed_slots[row]; - HeapTuple tuple = decompressed_slot->tts_ops->get_heap_tuple(decompressed_slot); -#if PG16_LT - HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys, valid); -#else - valid = HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys); -#endif + valid = slot_keys_test(decompressed_slot, num_scankeys, scankeys); if (valid) { if (constraints) diff --git a/tsl/src/compression/compression_dml.h b/tsl/src/compression/compression_dml.h index e59fef8bc47..90b77ce73f0 100644 --- a/tsl/src/compression/compression_dml.h +++ b/tsl/src/compression/compression_dml.h @@ -30,6 +30,8 @@ typedef struct tuple_filtering_constraints bool nullsnotdistinct; } tuple_filtering_constraints; +bool slot_key_test(TupleTableSlot *slot, ScanKey skey); + ScanKeyData *build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *settings, Relation out_rel, tuple_filtering_constraints *constraints, diff --git a/tsl/src/compression/compression_scankey.c b/tsl/src/compression/compression_scankey.c index e8b8acd2b4e..be5e6ef9652 100644 --- a/tsl/src/compression/compression_scankey.c +++ b/tsl/src/compression/compression_scankey.c @@ -22,6 +22,31 @@ static int create_segment_filter_scankey(Relation in_rel, char *segment_filter_c Bitmapset **null_columns, Datum value, bool is_null_check, bool is_array_op); +/* + * Test ScanKey against a slot. + * + * Unlike HeapKeyTest, this function takes into account SK_ISNULL + * and works correctly when looking for null values. + */ +bool +slot_key_test(TupleTableSlot *compressed_slot, ScanKey key) +{ + /* No need to get the datum if we are only checking for NULLs */ + if (key->sk_flags & SK_ISNULL) + { + return slot_attisnull(compressed_slot, key->sk_attno); + } + + Datum val; + bool is_null; + val = slot_getattr(compressed_slot, key->sk_attno, &is_null); + + if (is_null) + return false; + + return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument)); +} + /* * Build scankeys for decompressed tuple to check if it is part of the batch. * diff --git a/tsl/src/compression/recompress.c b/tsl/src/compression/recompress.c index 451a2126bc6..3b8bda10435 100644 --- a/tsl/src/compression/recompress.c +++ b/tsl/src/compression/recompress.c @@ -13,6 +13,7 @@ #include #include "compression.h" +#include "compression_dml.h" #include "create.h" #include "guc.h" #include "hypercore/hypercore_handler.h" @@ -517,20 +518,6 @@ update_orderby_scankeys(TupleTableSlot *uncompressed_slot, CompressedSegmentInfo } } -static bool -slot_key_test(TupleTableSlot *compressed_slot, ScanKey key) -{ - Datum val; - bool is_null; - val = slot_getattr(compressed_slot, key->sk_attno, &is_null); - - /* NULL values only match the value is NULL */ - if (key->sk_flags & SK_ISNULL) - return is_null; - - return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument)); -} - static enum Batch_match_result handle_null_scan(int key_flags, bool nulls_first, enum Batch_match_result result) { diff --git a/tsl/test/shared/expected/compression_nulls_not_distinct.out b/tsl/test/shared/expected/compression_nulls_not_distinct.out index c0ca0d20288..1e0067c6c68 100644 --- a/tsl/test/shared/expected/compression_nulls_not_distinct.out +++ b/tsl/test/shared/expected/compression_nulls_not_distinct.out @@ -6,17 +6,19 @@ -- only exists since then. once we drop support for PG14, this test -- can be moved to non-shared compression_conflicts test set timescaledb.debug_compression_path_info to on; -CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float); -CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT; +CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float); +CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT; SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time'); table_name nulls_not_distinct (1 row) ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device'); -NOTICE: default order by for hypertable "nulls_not_distinct" is set to ""time" DESC" -INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i); -INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1); +NOTICE: default order by for hypertable "nulls_not_distinct" is set to ""time" DESC, label" +INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.10', 'd2', 'l1', 1); SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c; INFO: using tuplesort to scan rows from "_hyper_X_X_chunk" for compression count @@ -25,9 +27,16 @@ INFO: using tuplesort to scan rows from "_hyper_X_X_chunk" for compression -- shouldn't succeed because nulls are not distinct \set ON_ERROR_STOP 0 -INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1); -INFO: Using index scan with scan keys: index 1, heap 2, memory 1. -ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_idx" +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1); +INFO: Using index scan with scan keys: index 1, heap 4, memory 2. +ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx" +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1); +INFO: Using index scan with scan keys: index 1, heap 2, memory 2. +ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx" \set ON_ERROR_STOP 1 +-- should insert without error, no conflict +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', 'l1', 1); +INFO: Using index scan with scan keys: index 1, heap 4, memory 2. +INFO: Number of compressed rows fetched from index: 1. Number of compressed rows filtered by heap filters: 0. RESET timescaledb.debug_compression_path_info; DROP TABLE nulls_not_distinct; diff --git a/tsl/test/shared/sql/compression_nulls_not_distinct.sql b/tsl/test/shared/sql/compression_nulls_not_distinct.sql index 3912e71e176..8afff12128c 100644 --- a/tsl/test/shared/sql/compression_nulls_not_distinct.sql +++ b/tsl/test/shared/sql/compression_nulls_not_distinct.sql @@ -9,20 +9,26 @@ -- can be moved to non-shared compression_conflicts test set timescaledb.debug_compression_path_info to on; -CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float); -CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT; +CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float); +CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT; SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time'); ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device'); -INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i); -INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1); +INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.10', 'd2', 'l1', 1); SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c; -- shouldn't succeed because nulls are not distinct \set ON_ERROR_STOP 0 -INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1); +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1); \set ON_ERROR_STOP 1 +-- should insert without error, no conflict +INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', 'l1', 1); + RESET timescaledb.debug_compression_path_info; DROP TABLE nulls_not_distinct;