Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiang Song committed Jun 4, 2024
1 parent 2299645 commit 33f0fd1
Showing 1 changed file with 183 additions and 0 deletions.
183 changes: 183 additions & 0 deletions tests/end2end-tests/graphstorm-mt/mgpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,201 @@ then
exit -1
fi

bst_cnt=$(grep "Best Test reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test reconstruct_node_feat"
exit -1
fi

cnt=$(grep "Test reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test reconstruct_node_feat"
exit -1
fi

bst_cnt=$(grep "Best Validation reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation reconstruct_node_feat"
exit -1
fi

cnt=$(grep "Validation reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation reconstruct_node_feat"
exit -1
fi

cnt=$(ls -l /data/gsgnn_mt/ | grep epoch | wc -l)
if test $cnt != 3
then
echo "The number of save models $cnt is not equal to the specified topk 3"
exit -1
fi

rm -fr /data/gsgnn_mt/
rm /tmp/train_log.txt

echo "**************[Multi-task] dataset: Movielens, RGAT layer 2, node feat: fixed HF BERT, BERT nodes: movie, inference: full-graph, save model"
python3 -m graphstorm.run.gs_multi_task_learning --workspace $GS_HOME/training_scripts/gsgnn_mt --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_task_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_ec_er_lp.yaml --save-model-path /data/gsgnn_mt/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --num-layers 2 --fanout "4,4" --model-encoder-type rgat

error_and_exit $?

# check prints

bst_cnt=$(grep "Best Test node_classification" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test node_classification"
exit -1
fi

cnt=$(grep "Test node_classification" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test node_classification"
exit -1
fi

bst_cnt=$(grep "Best Validation node_classification" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation accuracy node_classification"
exit -1
fi

cnt=$(grep "Validation node_classification" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation node_classification"
exit -1
fi

bst_cnt=$(grep "Best Test edge_classification" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test edge_classification"
exit -1
fi

cnt=$(grep "Test edge_classification" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test edge_classification"
exit -1
fi

bst_cnt=$(grep "Best Validation edge_classification" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation edge_classification"
exit -1
fi

cnt=$(grep "Validation edge_classification" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation edge_classification"
exit -1
fi

bst_cnt=$(grep "Best Test edge_regression" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test edge_regression"
exit -1
fi

cnt=$(grep "Test edge_regression" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test edge_regression"
exit -1
fi

bst_cnt=$(grep "Best Validation edge_regression" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation edge_regression"
exit -1
fi

cnt=$(grep "Validation edge_regression" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation edge_regression"
exit -1
fi

bst_cnt=$(grep "Best Test link_prediction" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test link_prediction"
exit -1
fi

cnt=$(grep "Test link_prediction" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test link_prediction"
exit -1
fi

bst_cnt=$(grep "Best Validation link_prediction" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation link_prediction"
exit -1
fi

cnt=$(grep "Validation link_prediction" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation link_prediction"
exit -1
fi

bst_cnt=$(grep "Best Test reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Test reconstruct_node_feat"
exit -1
fi

cnt=$(grep "Test reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Test reconstruct_node_feat"
exit -1
fi

bst_cnt=$(grep "Best Validation reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $bst_cnt -lt 1
then
echo "We use SageMaker task tracker, we should have Best Validation reconstruct_node_feat"
exit -1
fi

cnt=$(grep "Validation reconstruct_node_feat" /tmp/train_log.txt | wc -l)
if test $cnt -lt $((1+$bst_cnt))
then
echo "We use SageMaker task tracker, we should have Validation reconstruct_node_feat"
exit -1
fi

cnt=$(ls -l /data/gsgnn_mt/ | grep epoch | wc -l)
if test $cnt != 3
then
echo "The number of save models $cnt is not equal to the specified topk 3"
exit -1
fi

rm -fr /data/gsgnn_mt/
rm /tmp/train_log.txt

echo "**************[Multi-task with learnable embedding] dataset: Movielens, RGCN layer 1, node feat: fixed HF BERT, BERT nodes: movie, with learnable node embedding, inference: full-graph, save model"
python3 -m graphstorm.run.gs_multi_task_learning --workspace $GS_HOME/training_scripts/gsgnn_mt --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_multi_task_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_nc_ec_er_lp.yaml --save-model-path /data/gsgnn_mt/ --save-model-frequency 1000 --logging-file /tmp/train_log.txt --logging-level debug --preserve-input True --use-node-embeddings True

Expand Down

0 comments on commit 33f0fd1

Please sign in to comment.