Skip to content

Commit

Permalink
[BUG][JNI] Trigger MemoryBuffer.onClosed after memory is freed (#15351)
Browse files Browse the repository at this point in the history
Closes #15350. This PR changes the order of the callback `MemoryBuffer.onClosed` to happen after our `MemoryCleaner` finishes. This is done so that we can accurately, and safely, reflect the state of the memory resource (be it device or host). This PR is needed to address a bug found in spark-rapids here: NVIDIA/spark-rapids#10585.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Gera Shegalov (https://github.com/gerashegalov)

URL: #15351
  • Loading branch information
abellina authored Mar 20, 2024
1 parent bf58765 commit 79d2dba
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 30 deletions.
23 changes: 13 additions & 10 deletions java/src/main/java/ai/rapids/cudf/ColumnVector.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -260,15 +260,18 @@ public void noWarnLeakExpected() {
public synchronized void close() {
refCount--;
offHeap.delRef();
if (eventHandler != null) {
eventHandler.onClosed(this, refCount);
}
if (refCount == 0) {
super.close();
offHeap.clean(false);
} else if (refCount < 0) {
offHeap.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
try {
if (refCount == 0) {
super.close();
offHeap.clean(false);
} else if (refCount < 0) {
offHeap.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
}
} finally {
if (eventHandler != null) {
eventHandler.onClosed(this, refCount);
}
}
}

Expand Down
23 changes: 13 additions & 10 deletions java/src/main/java/ai/rapids/cudf/HostColumnVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,17 +150,20 @@ public void noWarnLeakExpected() {
public synchronized void close() {
refCount--;
offHeap.delRef();
if (eventHandler != null) {
eventHandler.onClosed(this, refCount);
}
if (refCount == 0) {
offHeap.clean(false);
for( HostColumnVectorCore child : children) {
child.close();
try {
if (refCount == 0) {
offHeap.clean(false);
for (HostColumnVectorCore child : children) {
child.close();
}
} else if (refCount < 0) {
offHeap.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
}
} finally {
if (eventHandler != null) {
eventHandler.onClosed(this, refCount);
}
} else if (refCount < 0) {
offHeap.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
}
}

Expand Down
23 changes: 13 additions & 10 deletions java/src/main/java/ai/rapids/cudf/MemoryBuffer.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -240,15 +240,18 @@ public synchronized void close() {
if (cleaner != null) {
refCount--;
cleaner.delRef();
if (eventHandler != null) {
eventHandler.onClosed(refCount);
}
if (refCount == 0) {
cleaner.clean(false);
closed = true;
} else if (refCount < 0) {
cleaner.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
try {
if (refCount == 0) {
cleaner.clean(false);
closed = true;
} else if (refCount < 0) {
cleaner.logRefCountDebug("double free " + this);
throw new IllegalStateException("Close called too many times " + this);
}
} finally {
if (eventHandler != null) {
eventHandler.onClosed(refCount);
}
}
}
}
Expand Down

0 comments on commit 79d2dba

Please sign in to comment.