Fix NULL handling for in-memory tuple filtering

During DML decompression, we filter out batches that don't need decompression using in-memory filtering which filters compressed tuples using scankeys. Those scankeys were not detecting NULL values correctly for unique constraints which use NULLS NOT DISTINCT setting. This change uses existing codebase to properly detect those tuples when using SK_ISNULL flag.
timescale · Dec 23, 2024 · 1983063 · 1983063
1 parent 5c93057
commit 1983063
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 33 deletions.
diff --git a/.unreleased/pr_7557 b/.unreleased/pr_7557
@@ -0,0 +1 @@
+Fixes: #7557 Fix NULL handling for in-memory tuple filtering 
diff --git a/tsl/src/compression/compression_dml.c b/tsl/src/compression/compression_dml.c
@@ -574,6 +574,21 @@ decompress_batches_scan(Relation in_rel, Relation out_rel, Relation index_rel, S
 	return stats;
 }
 
+static bool
+slot_keys_test(TupleTableSlot *slot, int nkeys, ScanKey keys)
+{
+	int cur_nkeys = nkeys;
+	ScanKey cur_key = keys;
+
+	for (; cur_nkeys--; cur_key++)
+	{
+		if (!slot_key_test(slot, cur_key))
+			return false;
+	}
+
+	return true;
+}
+
 static bool
 batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scankeys,
 			  tuple_filtering_constraints *constraints, bool *skip_current_tuple)
@@ -585,12 +600,7 @@ batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scan
 	for (int row = 0; row < num_tuples; row++)
 	{
 		TupleTableSlot *decompressed_slot = decompressor->decompressed_slots[row];
-		HeapTuple tuple = decompressed_slot->tts_ops->get_heap_tuple(decompressed_slot);
-#if PG16_LT
-		HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys, valid);
-#else
-		valid = HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys);
-#endif
+		valid = slot_keys_test(decompressed_slot, num_scankeys, scankeys);
 		if (valid)
 		{
 			if (constraints)

diff --git a/tsl/src/compression/compression_dml.h b/tsl/src/compression/compression_dml.h
@@ -30,6 +30,8 @@ typedef struct tuple_filtering_constraints
 	bool nullsnotdistinct;
 } tuple_filtering_constraints;
 
+bool slot_key_test(TupleTableSlot *slot, ScanKey skey);
+
 ScanKeyData *build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *settings,
 										  Relation out_rel,
 										  tuple_filtering_constraints *constraints,

diff --git a/tsl/src/compression/compression_scankey.c b/tsl/src/compression/compression_scankey.c
@@ -22,6 +22,31 @@ static int create_segment_filter_scankey(Relation in_rel, char *segment_filter_c
 										 Bitmapset **null_columns, Datum value, bool is_null_check,
 										 bool is_array_op);
 
+/*
+ * Test ScanKey against a slot.
+ *
+ * Unlike HeapKeyTest, this function takes into account SK_ISNULL
+ * and works correctly when looking for null values.
+ */
+bool
+slot_key_test(TupleTableSlot *compressed_slot, ScanKey key)
+{
+	/* No need to get the datum if we are only checking for NULLs */
+	if (key->sk_flags & SK_ISNULL)
+	{
+		return slot_attisnull(compressed_slot, key->sk_attno);
+	}
+
+	Datum val;
+	bool is_null;
+	val = slot_getattr(compressed_slot, key->sk_attno, &is_null);
+
+	if (is_null)
+		return false;
+
+	return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument));
+}
+
 /*
  * Build scankeys for decompressed tuple to check if it is part of the batch.
  *

diff --git a/tsl/src/compression/recompress.c b/tsl/src/compression/recompress.c
@@ -13,6 +13,7 @@
 #include <utils/typcache.h>
 
 #include "compression.h"
+#include "compression_dml.h"
 #include "create.h"
 #include "guc.h"
 #include "hypercore/hypercore_handler.h"
@@ -517,20 +518,6 @@ update_orderby_scankeys(TupleTableSlot *uncompressed_slot, CompressedSegmentInfo
 	}
 }
 
-static bool
-slot_key_test(TupleTableSlot *compressed_slot, ScanKey key)
-{
-	Datum val;
-	bool is_null;
-	val = slot_getattr(compressed_slot, key->sk_attno, &is_null);
-
-	/* NULL values only match the value is NULL */
-	if (key->sk_flags & SK_ISNULL)
-		return is_null;
-
-	return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument));
-}
-
 static enum Batch_match_result
 handle_null_scan(int key_flags, bool nulls_first, enum Batch_match_result result)
 {

diff --git a/tsl/test/shared/expected/compression_nulls_not_distinct.out b/tsl/test/shared/expected/compression_nulls_not_distinct.out
@@ -6,17 +6,19 @@
 -- only exists since then. once we drop support for PG14, this test
 -- can be moved to non-shared compression_conflicts test
 set timescaledb.debug_compression_path_info to on;
-CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float);
-CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT;
+CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float);
+CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT;
 SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time');
      table_name     
  nulls_not_distinct
 (1 row)
 
 ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device');
-NOTICE:  default order by for hypertable "nulls_not_distinct" is set to ""time" DESC"
-INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i);
-INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
+NOTICE:  default order by for hypertable "nulls_not_distinct" is set to ""time" DESC, label"
+INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.10', 'd2', 'l1', 1);
 SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c;
 INFO:  using tuplesort to scan rows from "_hyper_X_X_chunk" for compression
  count 
@@ -25,9 +27,16 @@ INFO:  using tuplesort to scan rows from "_hyper_X_X_chunk" for compression
 
 -- shouldn't succeed because nulls are not distinct
 \set ON_ERROR_STOP 0
-INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
-INFO:  Using index scan with scan keys: index 1, heap 2, memory 1. 
-ERROR:  duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_idx"
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
+INFO:  Using index scan with scan keys: index 1, heap 4, memory 2. 
+ERROR:  duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx"
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
+INFO:  Using index scan with scan keys: index 1, heap 2, memory 2. 
+ERROR:  duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx"
 \set ON_ERROR_STOP 1
+-- should insert without error, no conflict
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', 'l1', 1);
+INFO:  Using index scan with scan keys: index 1, heap 4, memory 2. 
+INFO:  Number of compressed rows fetched from index: 1. Number of compressed rows filtered by heap filters: 0.
 RESET timescaledb.debug_compression_path_info;
 DROP TABLE nulls_not_distinct;
diff --git a/tsl/test/shared/sql/compression_nulls_not_distinct.sql b/tsl/test/shared/sql/compression_nulls_not_distinct.sql
@@ -9,20 +9,26 @@
 -- can be moved to non-shared compression_conflicts test
 
 set timescaledb.debug_compression_path_info to on;
-CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float);
-CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT;
+CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float);
+CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT;
 SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time');
 ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device');
 
-INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i);
-INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
+INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.10', 'd2', 'l1', 1);
 
 SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c;
 
 -- shouldn't succeed because nulls are not distinct
 \set ON_ERROR_STOP 0
-INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
 \set ON_ERROR_STOP 1
 
+-- should insert without error, no conflict
+INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', 'l1', 1);
+
 RESET timescaledb.debug_compression_path_info;
 DROP TABLE nulls_not_distinct;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fixes: #7557 Fix NULL handling for in-memory tuple filtering