Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict getting the restore process start time to CRaC API #20262

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions runtime/vm/CRIUHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,6 @@ criuCheckpointJVMImpl(JNIEnv *env,
I_32 syslogBufferSize = 0;
UDATA oldVMState = VM_VMHelpers::setVMState(currentThread, J9VMSTATE_CRIU_SUPPORT_CHECKPOINT_PHASE_START);
UDATA notSafeToCheckpoint = 0;
UDATA criuRestorePid = 0;
U_32 intGhostFileLimit = 0;
IDATA criuDumpReturnCode = 0;
bool restoreFailure = false;
Expand Down Expand Up @@ -1878,19 +1877,21 @@ criuCheckpointJVMImpl(JNIEnv *env,
}

if (hasDumpSucceeded) {
/* Calculate restore time excluding `criu restore ...` for MXBean API. */
criuRestorePid = j9sysinfo_get_ppid();
systemReturnCode = j9sysinfo_get_process_start_time(criuRestorePid, &restoreNanoUTCTime);
if (0 != systemReturnCode) {
currentExceptionClass = vm->checkpointState.criuSystemRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(
J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE,
J9NLS_VM_CRIU_J9_GET_PROCESS_START_TIME_FAILURE,
NULL);
restoreFailure = true;
/* Calculate restore time for CRaC MXBean API. */
if (J9_ARE_ALL_BITS_SET(vm->checkpointState.flags, J9VM_CRAC_IS_CHECKPOINT_ENABLED)) {
UDATA cracRestorePid = j9sysinfo_get_ppid();
systemReturnCode = j9sysinfo_get_process_start_time(cracRestorePid, &restoreNanoUTCTime);
if (0 != systemReturnCode) {
currentExceptionClass = vm->checkpointState.criuSystemRestoreExceptionClass;
nlsMsgFormat = j9nls_lookup_message(
J9NLS_DO_NOT_PRINT_MESSAGE_TAG | J9NLS_DO_NOT_APPEND_NEWLINE,
J9NLS_VM_CRIU_J9_GET_PROCESS_START_TIME_FAILURE,
NULL);
restoreFailure = true;
}
vm->checkpointState.processRestoreStartTimeInNanoseconds = (I_64)restoreNanoUTCTime;
Trc_VM_criu_process_restore_start_after_dump(currentThread, cracRestorePid, vm->checkpointState.processRestoreStartTimeInNanoseconds);
}
vm->checkpointState.processRestoreStartTimeInNanoseconds = (I_64)restoreNanoUTCTime;
Trc_VM_criu_process_restore_start_after_dump(currentThread, criuRestorePid, vm->checkpointState.processRestoreStartTimeInNanoseconds);

/* Load restore arguments from restore file or env vars. */
switch (loadRestoreArguments(currentThread, optionsFileChars, envFileChars)) {
Expand Down
16 changes: 0 additions & 16 deletions test/functional/cmdLineTests/criu/criu_nonPortable.xml
Original file line number Diff line number Diff line change
Expand Up @@ -209,22 +209,6 @@
<output type="failure" caseSensitive="yes" regex="no">User requested Java dump using</output>
</test>

<test id="Create CRIU checkpoint image and restore once - testGetProcessRestoreStartTime">
<command>bash $SCRIPPATH$ $TEST_RESROOT$ $JAVA_COMMAND$ "$JVM_OPTIONS$ $XTRACE_CRIU$ $STD_CMD_OPTS$" $MAINCLASS_TIMECHANGE$ testGetProcessRestoreStartTime 1 false false</command>
<output type="success" caseSensitive="no" regex="no">Killed</output>
<output type="success" caseSensitive="yes" regex="no">PASSED: InternalCRIUSupport.getProcessRestoreStartTime()</output>
<output type="failure" caseSensitive="yes" regex="no">CRIU is not enabled</output>
<output type="failure" caseSensitive="yes" regex="no">Operation not permitted</output>
<output type="failure" caseSensitive="yes" regex="no">FAILED: InternalCRIUSupport.getProcessRestoreStartTime()</output>
<!-- If CRIU can't acquire the original thread IDs, this test will fail. Nothing can be done about this failure. -->
<output type="success" caseSensitive="yes" regex="no">Thread pid mismatch</output>
<output type="success" caseSensitive="yes" regex="no">do not match expected</output>
<output type="success" caseSensitive="yes" regex="no">Unable to create a thread:</output>
<!-- In the past, the failure below was caused by an issue where CRIU can't be found on the PATH. -->
<output type="failure" caseSensitive="yes" regex="no">Could not dump the JVM processes, err=-70</output>
<output type="failure" caseSensitive="yes" regex="no">User requested Java dump using</output>
</test>

<test id="Create CRIU checkpoint image and restore once - testMXBeanUpTime">
<command>bash $SCRIPPATH$ $TEST_RESROOT$ $JAVA_COMMAND$ "$JVM_OPTIONS$ -Xtrace:print={j9jcl.533,j9vm.684-696,j9vm.699,j9vm.717-743} $STD_CMD_OPTS$" -Xdump:java+system+jit:events=throw+systhrow,filter=org/eclipse/openj9/criu/JVMCheckpointException $MAINCLASS_TIMECHANGE$ testMXBeanUpTime 1 false false</command>
<output type="success" caseSensitive="no" regex="no">Killed</output>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ public static void main(String args[]) throws InterruptedException {
case "testGetLastRestoreTime":
tct.testGetLastRestoreTime();
break;
case "testGetProcessRestoreStartTime":
tct.testGetProcessRestoreStartTime();
break;
case "testMXBeanUpTime":
tct.testMXBeanUpTime();
break;
Expand Down Expand Up @@ -267,33 +264,6 @@ private void testGetLastRestoreTime() {
}
}

private void testGetProcessRestoreStartTime() {
long processRestoreStartTime = InternalCRIUSupport.getProcessRestoreStartTime();
if (processRestoreStartTime != -1) {
System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime
+ " is not -1 before restore");
}
CRIUSupport criu = CRIUTestUtils.prepareCheckPointJVM(CRIUTestUtils.imagePath);
long beforeCheckpointTime = TimeUtilities.getCurrentTimeInNanoseconds();
CRIUTestUtils.checkPointJVMNoSetup(criu, CRIUTestUtils.imagePath, false);
processRestoreStartTime = InternalCRIUSupport.getProcessRestoreStartTime();
long lastRestoreTime = InternalCRIUSupport.getLastRestoreTime();
long afterRestoreTime = TimeUtilities.getCurrentTimeInNanoseconds();
if (beforeCheckpointTime >= processRestoreStartTime) {
System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime
+ " is less than beforeCheckpointTime - " + beforeCheckpointTime);
} else if (processRestoreStartTime >= lastRestoreTime) {
System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime
+ " is more than InternalCRIUSupport.getLastRestoreTime() - " + lastRestoreTime);
} else if (processRestoreStartTime >= afterRestoreTime) {
System.out.println("FAILED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime
+ " is more than afterRestoreTime - " + afterRestoreTime);
} else {
System.out.println("PASSED: InternalCRIUSupport.getProcessRestoreStartTime() - " + processRestoreStartTime
+ " is between beforeCheckpointTime - " + beforeCheckpointTime + " and afterRestoreTime - " + afterRestoreTime);
}
}

private void testMXBeanUpTime() {
CRIUSupport criu = CRIUTestUtils.prepareCheckPointJVM(CRIUTestUtils.imagePath);
RuntimeMXBean mxb = ManagementFactory.getRuntimeMXBean();
Expand Down