From 7222cba8c1d81fa80076f30b385fdd94a4447929 Mon Sep 17 00:00:00 2001 From: Alexandria Barghi Date: Fri, 29 Dec 2023 12:09:19 -0800 Subject: [PATCH] remove unwanted file --- .../bulk_sampling/slurm-4552638.out | 84 ------------------- 1 file changed, 84 deletions(-) delete mode 100644 benchmarks/cugraph/standalone/bulk_sampling/slurm-4552638.out diff --git a/benchmarks/cugraph/standalone/bulk_sampling/slurm-4552638.out b/benchmarks/cugraph/standalone/bulk_sampling/slurm-4552638.out deleted file mode 100644 index 214c51f89a3..00000000000 --- a/benchmarks/cugraph/standalone/bulk_sampling/slurm-4552638.out +++ /dev/null @@ -1,84 +0,0 @@ -Node IP: 10.248.13.130 -Num Nodes: 1 -Num GPUs Per Node: 8 -/scripts/run_sampling.sh: line 3: conda: command not found -/scripts/run_sampling.sh: line 5: conda: command not found -Cloning into '/opt/cugraph-patch'... ->>>> Using cluster configurtion for TCP ->>>> Logs written to: /logs -properly waiting for workers to connect -wait_for_workers.py - initializing client...done. -wait_for_workers.py expected 8 but got 0, waiting... -scheduler started. -worker(s) started. -waiting for worker pid 3003102 to finish before exiting script... -wait_for_workers.py expected 8 but got 0, waiting... -wait_for_workers.py got 8 workers, done. -0 -Launching Python Script -INFO:__main__:starting dask client -INFO:__main__:dask client started -INFO:__main__:dataset: ogbn_papers100M -INFO:__main__:batch size: 512 -INFO:__main__:fanout: [10, 10, 10] -INFO:__main__:seeds_per_call: 524288 -INFO:__main__:num epochs: 1 -INFO:__main__:ogbn_papers100M -INFO:__main__:Number of input edges = 1,615,685,872 -INFO:__main__:constructed graph -/usr/local/lib/python3.10/dist-packages/cudf/core/index.py:3284: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead. - warnings.warn( -/usr/local/lib/python3.10/dist-packages/cudf/core/index.py:3284: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead. - warnings.warn( -INFO:__main__:input memory: 38776460928 -/scripts/cugraph_bulk_sampling.py:837: UserWarning: An Exception Occurred! - warnings.warn("An Exception Occurred!") -Traceback (most recent call last): - File "/scripts/cugraph_bulk_sampling.py", line 809, in - ) = benchmark_cugraph_bulk_sampling( - File "/scripts/cugraph_bulk_sampling.py", line 565, in benchmark_cugraph_bulk_sampling - os.makedirs(output_subdir) - File "/usr/lib/python3.10/os.py", line 225, in makedirs - mkdir(name, mode) -FileExistsError: [Errno 17] File exists: '/samples/ogbn_papers100M[1]_b512_f[10, 10, 10]' - -Dask client created using /scripts/mg_utils/dask_scheduler.json -Loading edge index for edge type paper__cites__paper -Loading node labels for node type paper (offset=0) -[Errno 17] File exists: '/samples/ogbn_papers100M[1]_b512_f[10, 10, 10]' -----------------------------------------dataset = ogbn_papers100M completed---------------------------------------- - -Dask client closed. -[1703880232.482904] [luna-0521:3003367:0] parser.c:1993 UCX WARN unused environment variable: UCX_MEMTYPE_CACHE (maybe: UCX_MEMTYPE_CACHE?) -[1703880232.482904] [luna-0521:3003367:0] parser.c:1993 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning) -3002880 /bin/bash /scripts/mg_utils/run-dask-process.sh scheduler workers -3002934 /usr/bin/python /usr/local/bin/dask-scheduler --protocol=tcp --scheduler-file /scripts/mg_utils/dask_scheduler.json -3003102 /usr/bin/python /usr/local/bin/dask-cuda-worker --rmm-pool-size=28G --rmm-async --local-directory=/tmp/abarghi --scheduler-file=/scripts/mg_utils/dask_scheduler.json --memory-limit=auto --device-memory-limit=auto -3003232 /usr/bin/python -c from multiprocessing.resource_tracker import main;main(44) -3003235 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=51) --multiprocessing-fork -3003239 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=58) --multiprocessing-fork -3003243 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=69) --multiprocessing-fork -3003248 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=76) --multiprocessing-fork -3003251 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=83) --multiprocessing-fork -3003256 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=90) --multiprocessing-fork -3003261 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=97) --multiprocessing-fork -3003264 /usr/bin/python -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=45, pipe_handle=100) --multiprocessing-fork -3221265 /home/selene-nfs/etc/dcgm-collector/venv/bin/python -m hwinf_dcgm_collector.hwinf_dcgm_collector -pkill: killing pid 3221265 failed: Operation not permitted -3003232 /usr/bin/python -c from multiprocessing.resource_tracker import main;main(44) -3003235 python -3003239 python -3003243 python -3003248 python -3003251 python -3003256 python -3003261 python -3003264 python -3221265 /home/selene-nfs/etc/dcgm-collector/venv/bin/python -m hwinf_dcgm_collector.hwinf_dcgm_collector -3002934 /usr/bin/python /usr/local/bin/dask-scheduler --protocol=tcp --scheduler-file /scripts/mg_utils/dask_scheduler.json -3003102 /usr/bin/python /usr/local/bin/dask-cuda-worker --rmm-pool-size=28G --rmm-async --local-directory=/tmp/abarghi --scheduler-file=/scripts/mg_utils/dask_scheduler.json --memory-limit=auto --device-memory-limit=auto -srun: Job 4552638 step creation temporarily disabled, retrying (Requested nodes are busy) -srun: Step created for job 4552638 -slurmstepd: error: execve(): RAPIDS_NO_INITIALIZE=1: No such file or directory -srun: error: luna-0521: task 0: Exited with exit code 2 -srun: launch/slurm: _step_signal: Terminating StepId=4552638.2