awslabs · classicsong · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/examples/peft_llm_gnn/README.md b/examples/peft_llm_gnn/README.md
@@ -1,6 +1,16 @@
+## Preparing the environment
+Please follow https://graphstorm.readthedocs.io/en/latest/install/env-setup.html to setup your GraphStorm environment.
+In addition, run the following scripts to install necessary python packages
+
+```
+pip install ipython
+pip install peft
+```
+
+## Preparing Amazon Review dataset
 This folder contains the data processing script to process the raw Amazon Review dataset
 downloaded from https://cseweb.ucsd.edu/~jmcauley/datasets/amazon_v2/. We use domain Video
-Games https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/metaFiles2/meta_Video_Games.json.gz 
+Games https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/metaFiles2/meta_Video_Games.json.gz
 and put it under raw_data as an example.
 ```
 wget https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_v2/metaFiles2/meta_Video_Games.json.gz \
@@ -15,21 +25,21 @@ python preprocess_amazon_review.py
 
 Once the data are processed, run the following command to construct a graph
 for PEFT LLM-GNNs in GraphStorm for node classification on level-3 product type.
-The command takes `AR_Video_Games.json` that specifies the input data for graph 
+The command takes `AR_Video_Games.json` that specifies the input data for graph
 construction, constructs the graph, and saves the parition to `amazon_review`.
 
 ```
 python -m graphstorm.gconstruct.construct_graph \
 			--conf-file AR_Video_Games.json \
 			--output-dir datasets/amazon_review_Video_Games/ \
 			--graph-name amazon_review \
-			--num-processes 16 --num-parts 1 \ 
+			--num-processes 16 --num-parts 1 \
 			--skip-nonexist-edges --add-reverse-edges
 
 ```
 
 ## Train LLM-GNN model to predict product type of items
-The command below runs parameter-efficient fine-tuning of LLM-GNNs on node 
+The command below runs parameter-efficient fine-tuning of LLM-GNNs on node
 classification and link prediction via `main_nc.py` and `main_lp.py`.
 
 
@@ -38,6 +48,8 @@ classification and link prediction via `main_nc.py` and `main_lp.py`.
 WORKSPACE=$PWD
 dataset=amazon_review
 domain=Video_Games
+cp -r datasets/amazon_review_"$domain" datasets/amazon_review_nc_"$domain"
+
 python3 -m graphstorm.run.launch \
     --workspace "$WORKSPACE" \
     --part-config datasets/amazon_review_nc_"$domain"/amazon_review.json \
@@ -59,7 +71,7 @@ dataset=amazon_review
 domain=Video_Games
 python -m graphstorm.run.launch \
     --workspace "$WORKSPACE" \
-    --part-config "$WORKSPACE"/dataset/amazon_review_"$domain"/amazon_review.json \
+    --part-config "$WORKSPACE"/datasets/amazon_review_"$domain"/amazon_review.json \
     --ip-config ./ip_list.txt \
     --num-trainers 8 \
     --num-servers 1 \