Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix multiple lms #553

Merged
merged 13 commits into from
Oct 27, 2023
31 changes: 19 additions & 12 deletions python/graphstorm/model/lm_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,22 @@ def get_lm_node_feat(self, ntype):
"""
return self._lm_node_feats[ntype]

def get_feat_size(self, ntype):
""" Get the LM output feature size for a node type.

Parameters
----------
ntype : str
The node type

Returns
-------
int : The feature size of the LM model
"""
assert len(self._lm_models) > 0
lm_type = self._lm_map[ntype]
return self._lm_models[lm_type].feat_size

@property
def ntypes(self):
""" Get all node types with text features.
Expand All @@ -191,15 +207,6 @@ def ntypes(self):
"""
return list(self._lm_map.keys())

@property
def feat_size(self):
""" The feature size of the BERT model.
"""
assert len(self._lm_models) > 0
for model in self._lm_models.values():
return model.feat_size
return -1

@property
def device(self):
""" The device where the model is on.
Expand Down Expand Up @@ -418,9 +425,9 @@ def __init__(self,
self.use_cache = False
self.lm_emb_cache = LMCache(g, self._lm_models, embed_path=cached_embed_path)

self._feat_size = self._lm_models.feat_size
self._feat_size = self._lm_models.get_feat_size(self._lm_models.ntypes[0])
for lm_model in self._lm_models.lm_models:
assert self.out_dims == lm_model.feat_size, \
assert self._feat_size == lm_model.feat_size, \
"All Language models should have the same output embedding " \
"dimension, otherwise please use GSLMNodeEncoderInputLayer " \
"(--model-encoder-type mlp) instead of GSLMNodeLMInputLayer " \
Expand Down Expand Up @@ -595,7 +602,7 @@ def __init__(self,
lm_ntypes = lm_config["node_types"]
# Update feature size
for ntype in lm_ntypes:
adjust_feat_size[ntype] += lm_models.feat_size
adjust_feat_size[ntype] += lm_models.get_feat_size(ntype)
if get_rank() == 0:
logging.debug('Node %s adds lm %s features %d->%d',
ntype, lm_config["lm_type"], feat_size[ntype],
Expand Down
5 changes: 5 additions & 0 deletions tests/end2end-tests/graphstorm-lp/mgpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ error_and_exit $?

rm -fr /data/gsgnn_lp_ml_dotprod_text/*

echo "**************dataset: Movielens, RGCN layer 1, BERT/ALBERT nodes: movie, user (different hidden dims), inference: mini-batch, negative_sampler: joint, decoder: Dot Product, exclude_training_targets: true, save model"
python3 -m graphstorm.run.launch --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lm_encoder_lp_train_val_1p_4t/movie-lens-100k-text.json --ip-config ip_list.txt --ssh-port 2222 $GS_HOME/python/graphstorm/run/gsgnn_lp/gsgnn_lp.py --cf ml_lp_text_multiple_lm_models.yaml --fanout '4' --num-layers 1 --lp-decoder-type dot_product --train-etype user,rating,movie

error_and_exit $?

echo "**************dataset: Movielens, RGCN layer 1, node feat: fixed HF BERT, inference: full-graph, negative_sampler: localuniform, exclude_training_targets: true, test_negative_sampler: uniform"
python3 -m graphstorm.run.gs_link_prediction --workspace $GS_HOME/training_scripts/gsgnn_lp --num-trainers $NUM_TRAINERS --num-servers 1 --num-samplers 0 --part-config /data/movielen_100k_lp_train_val_1p_4t/movie-lens-100k.json --ip-config ip_list.txt --ssh-port 2222 --cf ml_lp.yaml --fanout '5' --num-layers 1 --use-mini-batch-infer false --exclude-training-targets True --reverse-edge-types-map user,rating,rating-rev,movie --train-negative-sampler localuniform --eval-negative-sampler uniform

Expand Down
62 changes: 62 additions & 0 deletions training_scripts/gsgnn_lp/ml_lp_text_multiple_lm_models.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
version: 1.0
lm_model:
node_lm_models:
-
lm_type: bert
model_name: "bert-base-uncased"
gradient_checkpoint: true
node_types:
- movie
-
lm_type: albert
model_name: "albert-large-v1"
gradient_checkpoint: true
node_types:
- user
gsf:
basic:
backend: gloo
ip_config: ip_list.txt
part_config: movielen_100k_text_lp_train_val_1p_4t/movie-lens-100k.json
verbose: false
save_perf_results_path: null
gnn:
model_encoder_type: rgcn
fanout: "4"
num_layers: 1
hidden_size: 128
use_mini_batch_infer: true
lm:
lm_train_nodes: 0
lm_infer_batch_size: 32
freeze_lm_encoder_epochs: 0
input:
restore_model_path: null
output:
save_model_path: null
save_embed_path: null
hyperparam:
dropout: 0.
lr: 0.001
lm_tune_lr: 0.0001
num_epochs: 3
batch_size: 128
wd_l2norm: 0
no_validation: false
rgcn:
num_bases: -1
use_self_loop: true
lp_decoder_type: dot_product
sparse_optimizer_lr: 1e-2
use_node_embeddings: false
link_prediction:
num_negative_edges: 4
num_negative_edges_eval: 100
train_negative_sampler: joint
eval_etype:
- "user,rating,movie"
train_etype:
- "user,rating,movie"
exclude_training_targets: false
reverse_edge_types_map: []
Loading