diff --git a/.unreleased/pr_7600 b/.unreleased/pr_7600 new file mode 100644 index 00000000000..bb3a4008f81 --- /dev/null +++ b/.unreleased/pr_7600 @@ -0,0 +1 @@ +Fixes: #7600 Fix lock order when dropping index diff --git a/src/chunk_index.c b/src/chunk_index.c index da11d4a5658..5e85282f2fc 100644 --- a/src/chunk_index.c +++ b/src/chunk_index.c @@ -573,17 +573,70 @@ typedef struct ChunkIndexDeleteData bool drop_index; } ChunkIndexDeleteData; -/* Find all internal dependencies to be able to delete all the objects in one +/* + * Lock object. + * + * In particular, we need to ensure that we lock the table of an index before + * locking the index, or run the risk of ending up in a deadlock since the + * normal locking order is table first, index second. Since we're not a + * concurrent delete, we take a strong lock for this. + * + * It is also necessary that the parent table is locked first, but we have + * already done that at this stage, so it does not need to be done explicitly. + */ +static bool +chunk_lock_object_for_deletion(const ObjectAddress *obj) +{ + /* + * If we're locking an index, we need to lock the table first. See + * RangeVarCallbackForDropRelation() in tablecmds.c. We can ignore + * partition indexes since we're not using that. + */ + char relkind = get_rel_relkind(obj->objectId); + + /* + * If we cannot find the object, it might have been concurrently deleted + * (we do not have locks on objects yet). + */ + if (relkind == '\0') + return false; + if (relkind == RELKIND_INDEX) + { + Oid heapOid = IndexGetRelation(obj->objectId, true); + if (OidIsValid(heapOid)) + LockRelationOid(heapOid, AccessExclusiveLock); + } + + LockRelationOid(obj->objectId, AccessExclusiveLock); + return true; +} + +/* + * Find all internal dependencies to be able to delete all the objects in one * go. We do this by scanning the dependency table and keeping all the tables - * in our internal schema. */ -static void -chunk_collect_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses *objects) + * in our internal schema. + * + * We also lock the objects in the correct order (meaning table first, index + * second) here to make sure that we do not end up with deadlocks. + * + * We return 'true' if we added any objects, and 'false' otherwise. + */ +static bool +chunk_collect_and_lock_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses *objects) { Relation deprel = table_open(DependRelationId, RowExclusiveLock); ScanKeyData scankey[2]; SysScanDesc scan; HeapTuple tup; + /* + * If the object disappeared before we managed to get a lock on it, there + * is nothing more to do so just return early and indicate that there are + * no objects to delete. + */ + if (!chunk_lock_object_for_deletion(relobj)) + return false; + add_exact_object_address(relobj, objects); ScanKeyInit(&scankey[0], @@ -608,18 +661,13 @@ chunk_collect_objects_for_deletion(const ObjectAddress *relobj, ObjectAddresses { Form_pg_depend record = (Form_pg_depend) GETSTRUCT(tup); ObjectAddress refobj = { .classId = record->refclassid, .objectId = record->refobjid }; - - switch (record->deptype) - { - case DEPENDENCY_INTERNAL: - add_exact_object_address(&refobj, objects); - break; - default: - continue; /* Do nothing */ - } + if (record->deptype == DEPENDENCY_INTERNAL && chunk_lock_object_for_deletion(&refobj)) + add_exact_object_address(&refobj, objects); } + systable_endscan(scan); table_close(deprel, RowExclusiveLock); + return true; } static ScanTupleResult @@ -642,7 +690,8 @@ chunk_index_tuple_delete(TupleInfo *ti, void *data) if (OidIsValid(idxobj.objectId)) { - /* If we use performDeletion here it will fail if there are + /* + * If we use performDeletion() here it will fail if there are * internal dependencies on the object since we are restricting * the cascade. * @@ -651,15 +700,28 @@ chunk_index_tuple_delete(TupleInfo *ti, void *data) * internal dependencies and use the function * performMultipleDeletions. * - * The function performMultipleDeletions accept a list of objects - * and if there are dependencies between any of the objects given - * to the function, it will not generate an error for that but - * rather proceed with the deletion. If there are any dependencies - * (internal or not) outside this set of objects, it will still - * abort the deletion and print an error. */ + * We lock the objects to delete first to make sure that the lock + * order is correct. This is done inside RemoveRelations and + * performMultipleDeletions() expect these locks to be taken + * first. If not, it will take very rudimentary locks, which will + * cause deadlocks in some cases because the lock order is not + * correct. + * + * Since we do not have any locks on any objects at this point, + * the relations might have disappeared before we had a chance to + * lock them. In this case it is not necessary to do an explicit + * call to performMultipleDeletions(). + * + * The function performMultipleDeletions() accept a list of + * objects and if there are dependencies between any of the + * objects given to the function, it will not generate an error + * for that but rather proceed with the deletion. If there are any + * dependencies (internal or not) outside this set of objects, it + * will still abort the deletion and print an error. + */ ObjectAddresses *objects = new_object_addresses(); - chunk_collect_objects_for_deletion(&idxobj, objects); - performMultipleDeletions(objects, DROP_RESTRICT, 0); + if (chunk_collect_and_lock_objects_for_deletion(&idxobj, objects)) + performMultipleDeletions(objects, DROP_RESTRICT, 0); free_object_addresses(objects); } } diff --git a/tsl/test/isolation/expected/deadlock_drop_index_vacuum.out b/tsl/test/isolation/expected/deadlock_drop_index_vacuum.out new file mode 100644 index 00000000000..03fa5f15125 --- /dev/null +++ b/tsl/test/isolation/expected/deadlock_drop_index_vacuum.out @@ -0,0 +1,24 @@ +Parsed test spec with 4 sessions + +starting permutation: S1_lock S3_vacuum S2_lock S1_commit S4_drop_index S2_commit +step S1_lock: + LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE; + +step S3_vacuum: + VACUUM ANALYZE _timescaledb_internal.metrics_chunk_2; + +step S2_lock: + LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE; + +step S1_commit: + COMMIT; + +step S2_lock: <... completed> +step S4_drop_index: + DROP INDEX metrics_device_time_idx; + +step S2_commit: + COMMIT; + +step S3_vacuum: <... completed> +step S4_drop_index: <... completed> diff --git a/tsl/test/isolation/specs/CMakeLists.txt b/tsl/test/isolation/specs/CMakeLists.txt index 629f53094fb..960abb4b0d4 100644 --- a/tsl/test/isolation/specs/CMakeLists.txt +++ b/tsl/test/isolation/specs/CMakeLists.txt @@ -20,6 +20,7 @@ list( cagg_multi_iso.spec cagg_concurrent_refresh.spec deadlock_drop_chunks_compress.spec + deadlock_drop_index_vacuum.spec parallel_compression.spec osm_range_updates_iso.spec) diff --git a/tsl/test/isolation/specs/deadlock_drop_index_vacuum.spec b/tsl/test/isolation/specs/deadlock_drop_index_vacuum.spec new file mode 100644 index 00000000000..be11e373984 --- /dev/null +++ b/tsl/test/isolation/specs/deadlock_drop_index_vacuum.spec @@ -0,0 +1,130 @@ +# This file and its contents are licensed under the Timescale License. +# Please see the included NOTICE for copyright information and +# LICENSE-TIMESCALE for a copy of the license. + +# Order of locks in drop index and vacuum analyze was wrong, and DROP +# INDEX took the locks in order index-table, while VACUUM ANALYZE took +# them in order table-index. +# +# Create deadlock if the locking order is wrong. Problem here is that +# vacuum takes two locks. First one to read a table and then one to do +# the actual vacuum. For this reason we need two processes that end up +# in the deadlock (VACUUM ANALYZE and DROP INDEX respectively) and +# then two extra sessions working in lock-step to create the deadlock. +# +# It can be illustrated with this sequence, where we have a chunk +# table and a chunk index. The sessions between the brackets ([]) is +# the lock queue and session holding the lock is in angle brackets +# (<>) is the session that holds the lock on the object in question: +# +# S1: Lock chunk from hypertable +# index: [] +# table: [] +# S3: Start VACUUM ANALYZE, will attempt to lock chunk table, but get queued. +# index: [] +# table: [S3] +# S2: Lock chunk table from hypertable, which will be queued +# index: [] +# table: [S3 S2] +# S1: Unlock chunk table +# index: [] +# table: [S3 S2] +# S3: VACUUM ANALYZE continues and takes the lock on the chunk table. +# index: [] +# table: [S2] +# S3: VACUUM ANALYZE will release the lock on the chunk table. +# index: [] +# table: [S2] +# S3: VACUUM ANALYZE will attempt to lock the chunk table again +# index: [] +# table: [S2 S3] +# S2: The LOCK statement gets the lock and VACUUM will wait +# index: [] +# table: [S3] +# S4: DROP INDEX starts and takes lock in index first and then is +# queued for the chunk table +# index: [] +# table: [S3 S4] +# S2: Release lock on chunk table +# index: [] +# table: [S3 S4] +# S3: VACUUM continues and takes table lock and then tries index lock +# index: [S3] +# table: [S4] +# Deadlock + +setup { + CREATE TABLE metrics (time timestamptz, device_id integer, temp float); + SELECT create_hypertable('metrics', 'time', chunk_time_interval => interval '1 day'); + INSERT INTO metrics + SELECT generate_series('2018-12-01 00:00'::timestamp, + '2018-12-03 00:00', + '1 hour'), + (random()*30)::int, + random()*80 - 40; + + CREATE INDEX metrics_device_time_idx ON metrics(device_id, time NULLS FIRST); + + -- Rename chunks so that we have known names. We cannot execute + -- VACUUM in a function block. + DO $$ + DECLARE + chunk regclass; + count int = 1; + BEGIN + FOR chunk IN SELECT ch FROM show_chunks('metrics') ch + LOOP + EXECUTE format('ALTER TABLE %s RENAME TO metrics_chunk_%s', chunk, count); + count = count + 1; + END LOOP; + END + $$; +} + +teardown { + DROP TABLE metrics; +} + +session "S1" +setup { + START TRANSACTION; + SET TRANSACTION ISOLATION LEVEL READ COMMITTED; + SET LOCAL lock_timeout = '500ms'; + SET LOCAL deadlock_timeout = '300ms'; +} + +step "S1_lock" { + LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE; +} + +step "S1_commit" { + COMMIT; +} + +session "S2" +setup { + START TRANSACTION; + SET TRANSACTION ISOLATION LEVEL READ COMMITTED; + SET LOCAL lock_timeout = '500ms'; + SET LOCAL deadlock_timeout = '300ms'; +} + +step "S2_lock" { + LOCK TABLE _timescaledb_internal.metrics_chunk_2 IN ACCESS EXCLUSIVE MODE; +} + +step "S2_commit" { + COMMIT; +} + +session "S3" +step "S3_vacuum" { + VACUUM ANALYZE _timescaledb_internal.metrics_chunk_2; +} + +session "S4" +step "S4_drop_index" { + DROP INDEX metrics_device_time_idx; +} + +permutation S1_lock S3_vacuum S2_lock S1_commit S4_drop_index S2_commit