Implementation for "Enhancing Sentence Representation with Visually-supervised Multimodal Pre-training"
- Python (>=3.5)
- torch (>=1.1.0)
- transformers (>=2.3.0)
# For Flickr30K
cd datasets
python split_flickr_data.py
# ViP Pretraining
python vip_pretraining.py --cfg cfg/pretrain-flickr-resnet.yml
python unsupervised_nli.py --cfg cfg/unsupervised/snli.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/snli
python unsupervised_nli.py --cfg cfg/unsupervised/rte.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/rte
python unsupervised_nli.py --cfg cfg/unsupervised/qnli.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/qnli
python unsupervised_nli.py --cfg cfg/unsupervised/mnli.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/mnli
python unsupervised_nli.py --cfg cfg/unsupervised/mnli-mm.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/mnli-mm
python unsupervised_nli.py --cfg cfg/unsupervised/mrpc.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/mrpc
python unsupervised_nli.py --cfg cfg/unsupervised/qqp.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/qqp
python unsupervised_nli.py --cfg cfg/unsupervised/qqp.yml
python snli_unsupervised.py --data_folder ViP/unsupervised/flickr-resnet/qqp