forked from eubr-atmosphere/a-GPUBench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
grid_search.py
79 lines (67 loc) · 3.51 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/python3
"""
Copyright 2021 Giovanni Dispoto
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import xmltodict
import os
import logging
import sys
import subprocess
#Elements used in grid search
network_type = ['vgg_16', 'resnet_v1_50', 'alexnet_v2']
network_depth = [1,2,3] #for resnet
optimizers = ['adam', 'momentum', 'adadelta']
momentum = [0.9, 0.8, 0.7]
learning_rate_decay_type = ['fixed', 'polynomial', 'exponential']
learning_rate = [0.001, 0.0001, 0.00001]
dropout = [0.2, 0.3, 0.4]
batch_size = [32,64]
#base xml used in order to fill it with information and run a training session
config_name = 'base_grid.xml'
for network in network_type:
for optimizer in optimizers:
for lr in learning_rate:
for drop in dropout:
for bs in batch_size:
xml_file = os.path.join("/data/src/apps/tf/confs", config_name)
if not os.path.exists(xml_file):
logging.error("XML file %s not found", xml_file)
sys.exit(1)
# Load XML file
with open(xml_file) as fd:
doc = xmltodict.parse(fd.read(), force_list={'input_class'})
#fill xml with information obtained from grid search
#try with one configuration
doc['tensorflow_configuration']['network_type'] = network
doc['tensorflow_configuration']['optimizer'] = optimizer
doc['tensorflow_configuration']['momentum'] = 0.7 #not always used, but setted anyway
doc['tensorflow_configuration']['learning_rate'] = lr
doc['tensorflow_configuration']['dropout'] = drop
doc['tensorflow_configuration']['batch_size'] = bs
doc['tensorflow_configuration']['learning_rate_decay_type'] = 'fixed' #using fixed learning rate
#save configuration
with open(os.path.join(base_path, 'test_1.xml'), 'w') as result_file:
result_file.write(xmltodict.unparse(doc))
#modify grid_conf file specify which configuration to use
f = open(os.path.join(base_path,'grid_conf'), "w")
f.write("configuration="+"test_1")
f.close()
#try a run
command = "python3 ./app/vm_scripts/launch_local_experiment.py -a tf --parameters-list /data/config/grid_conf --profile GPU --output ./data/output"
logging.info("command is %s", command)
cmd = subprocess.Popen(command, shell=True)
retcode = cmd.wait()
if retcode == 0:
logging.info("launched experiment")
else:
logging.error("Error in launching experiment")
sys.exit(1)