forked from aces/Loris-MRI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch_uploads_tarchive.pl
executable file
·279 lines (210 loc) · 7.81 KB
/
batch_uploads_tarchive.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/usr/bin/perl -w
=pod
=head1 NAME
batch_uploads_tarchive - upload a batch of DICOM archives using script
C<tarchiveLoader.pl>
=head1 SYNOPSIS
./batch_uploads_tarchive
=head1 DESCRIPTION
This script uploads a list of DICOM archives to the database by calling script
C<tarchiveLoader.pl> on each file in succession. The list of files to process is read
from C<STDIN>, one file name per line. Each file name is assumed to be a path
relative to C<tarchiveLibraryDir> (see below).
The following settings of file F<$ENV{LORIS_CONFIG}/.loris-mri/prod> affect the
behvaviour of C<batch_uploads_tarchive> (where C<$ENV{LORIS_CONFIG}> is the
value of the Unix environment variable C<LORIS_CONFIG>):
=over 4
=item *
B<dataDirBasepath> : controls where the C<STDOUT> and C<STDERR> of each qsub
command (see below) will go, namely in
F<< $dataDirBasepath/batch_output/tarstdout.log<index> >> and
F<< $dataDirBasepath/batch_output/tarstderr.log<index> >>
(where C<< <index> >> is the index of the DICOM archive processed, the
first file having index 1).
=item *
B<tarchiveLibraryDir>: directory that contains the DICOM archives to process.
The path of the files listed on C<STDIN> should be relative to this directory.
=item *
B<is_qsub>: whether the output (STDOUT) of each C<tarchiveLoader.pl> command
should be processed by the C<qsub> Unix command (allows batch execution of jobs
on the Sun Grid Engine, if available). If set, then the C<qsub> command will
send its C<STDOUT> and C<STDERR> according to the value of C<dataDirBasepath>
(see above).
=item *
B<mail_use>: upon completion of the script, an email will be sent to email address
$mail_user containing the list of files processed by C<batch_uploads_tarchive>
=back
File prod should also contain the information needed to connect to the database in an
array C<@db> containing four elements:
=over 4
=item *
The database name
=item *
The SQL user name used to connect ot the database
=item *
The password for the user identified above
=item *
The database hostname
=back
=head1 TO DO
Code cleanup: remove unused C<-D> and C<-v> program arguments
=head1 LICENSING
License: GPLv3
=head1 AUTHORS
LORIS community <[email protected]> and McGill Centre for Integrative
Neuroscience
=cut
use strict;
use warnings;
no warnings 'once';
use Getopt::Tabular;
use NeuroDB::DBI;
use NeuroDB::ExitCodes;
use NeuroDB::Database;
use NeuroDB::DatabaseException;
use NeuroDB::objectBroker::ObjectBrokerException;
use NeuroDB::objectBroker::ConfigOB;
my $profile = undef;
my $verbose = 0;
my $profile_desc = "name of the config file in ../dicom-archive/.loris_mri";
my @opt_table = (
[ "Basic options", "section" ],
[ "-profile", "string", 1, \$profile, $profile_desc],
[ "-verbose", "boolean", 1, \$verbose, "Be verbose."]
);
my $Help = <<HELP;
******************************************************************************
Run tarchiveLoader.pl in batch mode
******************************************************************************
This script runs tarchiveLoader.pl insertion on multiple DICOM archives. The list
of DICOM archives are provided through a text file (e.g. tarchive_list.txt)
with one DICOM archive per line. DICOM archives are specified as the relative
path to the DICOM archive from the tarchive directory
(/data/project/data/tarchive).
An example of what tarchive_list.txt might contain for 3 DICOM archives to be
inserted:
DCM_2015-09-10_MTL0709_475639_V1.tar
DCM_2015-09-10_MTL0709_475639_V2.tar
DCM_2015-09-10_MTL0709_475639_V3.tar
HELP
my $Usage = <<USAGE;
usage: ./batch_uploads_tarchive -profile prod < tarchive_list.txt
$0 -help to list options
USAGE
&Getopt::Tabular::SetHelp( $Help, $Usage );
&Getopt::Tabular::GetOptions( \@opt_table, \@ARGV ) || exit 1;
#####Get config setting#######################################################
# checking for profile settings
if (!$profile ) {
print "You need to specify a profile file using the option '-profile'\n";
print $Help;
print "\n$Usage\n";
exit 3;
}
{ package Settings; do "$ENV{LORIS_CONFIG}/.loris_mri/$profile" }
if ($profile && !@Settings::db) {
print "\n\tERROR: You don't have a configuration file named ".
"'$profile' in: $ENV{LORIS_CONFIG}/.loris_mri/ \n\n";
exit 2;
}
# --------------------------------------------------------------
## Establish database connection
# --------------------------------------------------------------
# old database connection
my $dbh = &NeuroDB::DBI::connect_to_db(@Settings::db);
# new Moose database connection
my $db = NeuroDB::Database->new(
databaseName => $Settings::db[0],
userName => $Settings::db[1],
password => $Settings::db[2],
hostName => $Settings::db[3]
);
$db->connect();
print "\nSuccessfully connected to database \n";
# ----------------------------------------------------------------
## Get config setting using ConfigOB
# ----------------------------------------------------------------
my $configOB = NeuroDB::objectBroker::ConfigOB->new(db => $db);
my $data_dir = $configOB->getDataDirPath();
my $tarchiveLibraryDir = $configOB->getTarchiveLibraryDir();
my $mail_user = $configOB->getMailUser();
my $is_qsub = $configOB->getIsQsub();
my $converter = $configOB->getConverter();
my $bin_dirPath = $configOB->getMriCodePath();
# define project space
my ($debug) = (0);
my ($stdoutbase, $stderrbase) = ("$data_dir/batch_output/tarstdout.log", "$data_dir/batch_output/tarstderr.log");
my $stdout = '';
my $stderr = '';
while($_ = $ARGV[0], /^-/) {
shift;
last if /^--$/; ## -- ends argument processing
if (/^-D/) { $debug++ } ## debug level
if (/^-v/) { $verbose++ } ## verbosity
}
## read input from STDIN, store into array @inputs (`find ....... | this_script`)
my @inputs = ();
my @submitted = ();
while(<STDIN>)
{
chomp;
push @inputs, $_;
}
close STDIN;
my $counter = 0;
## foreach series, batch magic
foreach my $input (@inputs)
{
chomp($input);
my @linearray = split(' ', $input);
my $tarchive = $linearray[0];
$tarchive =~ s/\t/ /;
$tarchive =~ s/$tarchiveLibraryDir//;
my $upload_id = $linearray[1];
if (!$tarchive || !$upload_id) {
print STDERR "\nERROR: need to provide the ArchiveLocation and its "
. "associated UploadID separated by a space.\n\n";
exit $NeuroDB::ExitCodes::MISSING_ARG;
}
$counter++;
$stdout = $stdoutbase.$counter;
$stderr = $stderrbase.$counter;
## this is where the subprocesses are created...
## should basically run processor script with study directory as argument.
## processor will do all the real magic
my $tarchive_path = "$tarchiveLibraryDir/$tarchive";
my $command;
if ($converter =~ m/dcm2mnc/i) {
$command = sprintf(
"tarchiveLoader.pl -profile %s -uploadID %s %s",
$profile,
quotemeta($upload_id),
quotemeta($tarchive_path)
);
} elsif ($converter =~ m/dcm2niix/i) {
my $python_config = $configOB->getPythonConfigFile();
$command = sprintf(
"%s/python/run_dicom_archive_loader.py -p %s -t %s",
quotemeta($bin_dirPath),
$python_config,
quotemeta($tarchive_path)
);
}
##if qsub is enabled use it
if ($is_qsub) {
open QSUB, "| qsub -V -e $stderr -o $stdout -N process_tarchive_${counter}";
print QSUB $command;
close QSUB;
}
##if qsub is not enabled
else {
system($command);
}
push @submitted, $input;
}
open MAIL, "|mail $mail_user";
print MAIL "Subject: BATCH_UPLOADS_TARCHIVE: ".scalar(@submitted)." studies submitted.\n";
print MAIL join("\n", @submitted)."\n";
close MAIL;
## exit $NeuroDB::ExitCodes::SUCCESS for find to consider this -cmd true (in case we ever run it that way...)
exit $NeuroDB::ExitCodes::SUCCESS;