You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[2023-12-05T00:41:00.845Z] =========================== short test summary info ============================
[2023-12-05T00:41:00.845Z] FAILED ../../src/main/python/parquet_write_test.py::test_part_write_round_trip[Float][DATAGEN_SEED=1701731474, INJECT_OOM, IGNORE_ORDER, ALLOW_NON_GPU(ProjectExec,FilterExec,FileSourceScanExec,BatchScanExec,CollectLimitExec,DeserializeToObjectExec,DataWritingCommandExec,WriteFilesExec,ShuffleExchangeExec,ExecutedCommandExec)] - py4j.protocol.Py4JJavaError: An error occurred while calling o133807.parquet.
[2023-12-05T00:41:00.845Z] : org.apache.spark.SparkException: Job aborted.
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:231)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:188)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:131)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
[2023-12-05T00:41:00.845Z] at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:132)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:131)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:989)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:989)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:438)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:415)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:874)
[2023-12-05T00:41:00.845Z] at sun.reflect.GeneratedMethodAccessor107.invoke(Unknown Source)
[2023-12-05T00:41:00.845Z] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[2023-12-05T00:41:00.845Z] at java.lang.reflect.Method.invoke(Method.java:498)
[2023-12-05T00:41:00.845Z] at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
[2023-12-05T00:41:00.845Z] at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
[2023-12-05T00:41:00.845Z] at py4j.Gateway.invoke(Gateway.java:282)
[2023-12-05T00:41:00.845Z] at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
[2023-12-05T00:41:00.845Z] at py4j.commands.CallCommand.execute(CallCommand.java:79)
[2023-12-05T00:41:00.845Z] at py4j.GatewayConnection.run(GatewayConnection.java:238)
[2023-12-05T00:41:00.845Z] at java.lang.Thread.run(Thread.java:750)
[2023-12-05T00:41:00.845Z] Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1006.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1006.0 (TID 1940) (100.108.223.71 executor 0): org.apache.spark.SparkException: Task failed while writing rows.
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:296)
[2023-12-05T00:41:00.845Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
[2023-12-05T00:41:00.845Z] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
[2023-12-05T00:41:00.845Z] at org.apache.spark.scheduler.Task.run(Task.scala:131)
[2023-12-05T00:41:00.845Z] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
[2023-12-05T00:41:00.845Z] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
[2023-12-05T00:41:00.845Z] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
[2023-12-05T00:41:00.845Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2023-12-05T00:41:00.845Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2023-12-05T00:41:00.845Z] at java.lang.Thread.run(Thread.java:750)
[2023-12-05T00:41:00.845Z] Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/tmp/pyspark_tests/it-jenkins-rapids-it-non-utc-dev-5-cttk8-kb9fj-gw0-1598-15728116/PARQUET_DATA/CPU/_temporary/0/_temporary/attempt_202312050334123739114474908729722_1006_m_000000_1940/a=0.0/part-00000-0d2190b1-bf38-41a5-9cef-b886ed9b2f77.c000.snappy.parquet
[2023-12-05T00:41:00.845Z] at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:313)
[2023-12-05T00:41:00.845Z] at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:351)
[2023-12-05T00:41:00.845Z] at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:401)
[2023-12-05T00:41:00.845Z] at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:464)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:443)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:149)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.DynamicPartitionDataWriter.newOutputWriter(FileFormatDataWriter.scala:241)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.DynamicPartitionDataWriter.write(FileFormatDataWriter.scala:262)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:278)
[2023-12-05T00:41:00.846Z] at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1473)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:286)
[2023-12-05T00:41:00.846Z] ... 9 more
[2023-12-05T00:41:00.846Z]
[2023-12-05T00:41:00.846Z] Driver stacktrace:
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
[2023-12-05T00:41:00.846Z] at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
[2023-12-05T00:41:00.846Z] at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
[2023-12-05T00:41:00.846Z] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
[2023-12-05T00:41:00.846Z] at scala.Option.foreach(Option.scala:407)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
[2023-12-05T00:41:00.846Z] at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
[2023-12-05T00:41:00.846Z] at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:200)
[2023-12-05T00:41:00.846Z] ... 32 more
[2023-12-05T00:41:00.846Z] Caused by: org.apache.spark.SparkException: Task failed while writing rows.
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:296)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:210)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
[2023-12-05T00:41:00.846Z] at org.apache.spark.scheduler.Task.run(Task.scala:131)
[2023-12-05T00:41:00.846Z] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
[2023-12-05T00:41:00.846Z] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
[2023-12-05T00:41:00.846Z] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
[2023-12-05T00:41:00.846Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2023-12-05T00:41:00.846Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2023-12-05T00:41:00.846Z] ... 1 more
[2023-12-05T00:41:00.846Z] Caused by: org.apache.hadoop.fs.FileAlreadyExistsException: File already exists: file:/tmp/pyspark_tests/it-jenkins-rapids-it-non-utc-dev-5-cttk8-kb9fj-gw0-1598-15728116/PARQUET_DATA/CPU/_temporary/0/_temporary/attempt_202312050334123739114474908729722_1006_m_000000_1940/a=0.0/part-00000-0d2190b1-bf38-41a5-9cef-b886ed9b2f77.c000.snappy.parquet
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:313)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:351)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:401)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:464)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:443)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118)
[2023-12-05T00:41:00.846Z] at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:248)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:390)
[2023-12-05T00:41:00.846Z] at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:349)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:149)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.DynamicPartitionDataWriter.newOutputWriter(FileFormatDataWriter.scala:241)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.DynamicPartitionDataWriter.write(FileFormatDataWriter.scala:262)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:278)
[2023-12-05T00:41:00.846Z] at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1473)
[2023-12-05T00:41:00.846Z] at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:286)
[2023-12-05T00:41:00.846Z] ... 9 more
[2023-12-05T00:41:00.846Z] = 1 failed, 19073 passed, 2548 skipped, 389 xfailed, 403 xpassed, 881 warnings in 5386.29s (1:29:46) =
script returned exit code 1
Environment details (please complete the following information)
Spark 311
branch-24.02
The text was updated successfully, but these errors were encountered:
Describe the bug
Steps/Code to reproduce bug
Errors on CI:
Environment details (please complete the following information)
Spark 311
branch-24.02
The text was updated successfully, but these errors were encountered: