forked from Zasder3/train-CLIP
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain_finetune.py
33 lines (25 loc) · 1.16 KB
/
train_finetune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import torch
from argparse import ArgumentParser
from pytorch_lightning import Trainer
from data.text_image_dm import TextImageDataModule
from models import CustomCLIPWrapper
from torchvision.models import resnet50
from transformers import AutoTokenizer, AutoModel
def main(hparams):
img_encoder = resnet50(pretrained=True)
img_encoder.fc = torch.nn.Linear(2048, 768)
tokenizer = AutoTokenizer.from_pretrained("johngiorgi/declutr-sci-base")
txt_encoder = AutoModel.from_pretrained("johngiorgi/declutr-sci-base")
if hparams.minibatch_size < 1:
hparams.minibatch_size = hparams.batch_size
model = CustomCLIPWrapper(img_encoder, txt_encoder, hparams.minibatch_size, avg_word_embs=True)
dm = TextImageDataModule.from_argparse_args(hparams, custom_tokenizer=tokenizer)
trainer = Trainer.from_argparse_args(hparams, precision=16, max_epochs=32)
trainer.fit(model, dm)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--minibatch_size', type=int, default=0)
parser = TextImageDataModule.add_argparse_args(parser)
parser = Trainer.add_argparse_args(parser)
args = parser.parse_args()
main(args)