-
Notifications
You must be signed in to change notification settings - Fork 3
/
axiome.vala
1514 lines (1424 loc) · 48.8 KB
/
axiome.vala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
using Gee;
using Xml;
namespace AXIOME {
HashMap<string, string> primers;
[CCode(cname = "DATADIR")]
extern const string DATADIR;
[CCode(cname = "MODDIR")]
extern const string MODDIR;
[CCode(cname = "BINDIR")]
extern const string BINDIR;
/**
* Create a path for a file inside the bin directory where AXIOME was installed.
*
* e.g., bin_dir("aq-nmf") = "/usr/local/bin/aq-nmf"
*/
public string bin_dir(string filename) {
return Path.build_filename(BINDIR, filename);
}
/**
* Create a path for a file inside the shared directory where AXIOME was installed.
*
* e.g., data_dir("nmf.R") = "/usr/local/share/axiome/nmf.R"
*/
public string data_dir(string filename) {
return Path.build_filename(DATADIR, filename);
}
/**
* Checks if a file name contains things that will upset Make.
*/
public bool is_valid_filename(string filename) {
return Regex.match_simple("^[A-Za-z0-9/_.:+=%~@{}\\[\\]-]+$", filename);
}
/**
* Structure for describing versions of AXIOME
*/
public struct version {
int major;
int minor;
public version(int major, int minor) {
this.major = major;
this.minor = minor;
}
internal static bool parse(string str, out version result) {
result = version(0, 0);
var parts = str.split(".");
if (parts.length != 2) {
return false;
}
var major = int.parse(parts[0]);
var minor = int.parse(parts[1]);
if (major < 0 || minor < 0) {
return false;
}
result = version(major, minor);
return true;
}
internal bool older_than(version other) {
return this.major < other.major || this.major == other.major && this.minor < other.minor;
}
public string to_string() {
return @"$(major).$(minor)";
}
internal void update(version other) {
if (this.older_than(other)) {
this.major = other.major;
this.minor = other.minor;
}
}
}
/**
* Convenience class for new sequence sources.
*
* To create a rule responsible for drawing sequences out of some entity (a file, program, or database), create a subclass.
*
* A sequence source is expected to provide a shell command to extract sequences and provide them in FASTA format. Each sample must be associated with a regular expression capable of extracting matching sequences from the FASTA.
*/
public abstract class BaseSource : RuleProcessor {
/**
* {@inheritDoc}
*/
public override RuleType get_ruletype() {
return RuleType.SOURCE;
}
/**
* Gets a primer, by name, from the primer database.
*
* Primers may be either a length, a name, or a string of nucleotides. Names are resolved using AXIOME's primer database. If a primer name begins with a #, the length of the named primer will be returned, instead of the primer itself.
*/
protected string? get_primer(Xml.Node *definition, string? primer) {
if (primer != null) {
var up_primer = primer.up();
if (up_primer[0] == '#') {
up_primer = up_primer.substring(1);
if (primers.has_key(up_primer)) {
return primers[up_primer].length.to_string();
} else {
definition_error(definition, "Unknown primer %s. Ignorning, mumble, mumble.\n", primer);
return null;
}
} else if (primers.has_key(up_primer)) {
return Shell.quote(primers[up_primer]);
} else if (Regex.match_simple("^\\d+$", up_primer) || is_sequence(up_primer)) {
return Shell.quote(up_primer);
} else {
definition_error(definition, "Invalid primer %s. Ignorning, mumble, mumble.\n", primer);
return null;
}
}
return null;
}
/**
* Produce the shell command needed to provide the sequence to an output pipe in FASTA format.
*
* For a FASTA file, this is trivially "cat".
* @param defintion the XML element causing this ruckus
* @param samples the samples that are expected to be extracted from this data source
* @param command where to write the command
* @param output the nascent Makefile, if any extra rules are needed
* @return whether the command is valid
*/
protected abstract bool generate_command(Xml.Node *definition, Collection<Sample> samples, StringBuilder command, Output output);
/**
* For a sample XML tag, produce a valid regular expression that will appear in the FASTA headers for sequences belonging to this sample
* @return if null, the sample is invalid; otherwise, a regular expression to filter the FASTA stream.
*/
protected abstract string? get_sample_id(Xml.Node *sample);
public override bool process(Xml.Node *definition, Output output) {
var samples = new HashMap<string, Sample>();
for (Xml.Node *sample = definition-> children; sample != null; sample = sample-> next) {
if (sample-> type != Xml.ElementType.ELEMENT_NODE) {
continue;
}
if (sample-> name != "sample") {
definition_error(definition, "Invalid element %s. Ignorning, mumble, mumble.\n", sample-> name);
continue;
}
var tag = get_sample_id(sample);
if (tag == null || tag == "") {
continue;
}
if (samples.has_key(tag)) {
definition_error(definition, "Duplicated identifer %s on %s:%d. Skipping.\n", tag, sample-> doc-> url, sample-> line);
continue;
}
var sample_obj = output.add_sample(tag, sample);
samples[tag] = sample_obj;
var limit = sample-> get_prop("limit");
if (limit != null) {
var limitval = int.parse(limit);
if (limitval > 0) {
samples[tag].limit = limitval;
}
}
}
var command = new StringBuilder();
if (!generate_command(definition, samples.values.read_only_view, command, output)) {
return false;
}
output.prepare_sequences(command.str, samples.values);
return true;
}
}
/**
* Represents a sample from the input file, across all sequence sources.
*/
public class Sample : Object {
/**
* The XML sample tag that generated this sample.
*/
public Xml.Node *xml { get; internal set; }
/**
* The regular expression used the extract this sample from the source's FASTA stream.
*/
public string tag { get; internal set; }
/**
* The maximum number of sequences to allow from this sample, or all if non-positive.
*/
public int limit { get; internal set; }
/**
* The QIIME library identifier associated with this sample.
*/
public int id { get; internal set; }
}
/**
* Type of stanzas in the XML input document.
*/
public enum RuleType { DEFINITON, SOURCE, ANALYSIS }
/**
* Rule processor interface for analyses and data sources
*/
public abstract class RuleProcessor : Object {
/**
* The type of the rule. This determines the order in which rules must appear.
*/
public abstract RuleType get_ruletype();
/**
* Name of the XML tag for this sequence source.
*/
public abstract unowned string get_name();
/**
* Path to a file that must be included in the Makefile.
*/
public abstract unowned string ? get_include();
/**
* What version of AXIOME was this feature introduced in?
*/
public abstract version introduced_version();
/**
* Can this directive be included multiple times in a configuration file?
*/
public abstract bool is_only_once();
/**
* Create a processing stanza for the supplied definition.
*/
public abstract bool process(Xml.Node *definition, Output output);
}
/**
* Check if a sequence is a valid (degenerate) nucleotide sequence.
*/
public bool is_sequence(string sequence) {
return Regex.match_simple("^[ACGTKMSWRYBDHV]*$", sequence);
}
/**
* Pipeline options
*/
public enum Pipelines {
QIIME = 1,
MOTHUR = 2;
public static Pipelines ? parse(string name) {
var enum_class = (EnumClass) typeof(Pipelines).class_ref();
var nick = name.down().replace("_", "-");
unowned GLib.EnumValue ? enum_value = enum_class.get_value_by_nick(nick);
if (enum_value != null) {
Pipelines value = (Pipelines) enum_value.value;
return value;
}
return null;
}
public string to_string() {
return ((EnumClass) typeof (Pipelines).class_ref()).get_value(this).value_nick;
}
}
/**
* Friendly names for taxnomic levels as used by QIIME/RDP
*/
public enum TaxonomicLevel {
LIFE = 1,
DOMAIN = 2,
PHYLUM = 3,
CLASS = 4,
ORDER = 5,
FAMILY = 6,
GENUS = 7,
SPECIES = 8,
STRAIN = 9;
public static TaxonomicLevel ? parse(string name) {
var enum_class = (EnumClass) typeof(TaxonomicLevel).class_ref();
var nick = name.down().replace("_", "-");
unowned GLib.EnumValue ? enum_value = enum_class.get_value_by_nick(nick);
if (enum_value != null) {
TaxonomicLevel value = (TaxonomicLevel) enum_value.value;
return value;
}
return null;
}
public string to_string() {
return ((EnumClass) typeof (TaxonomicLevel).class_ref()).get_value(this).value_nick;
}
}
enum AlignMethod {
INFERNAL,
MUSCLE,
PYNAST;
internal static AlignMethod? parse(string method) {
switch (method.down()) {
case "infernal":
return INFERNAL;
case "muscle":
return MUSCLE;
case "pynast":
return PYNAST;
default:
return null;
}
}
internal void print(FileStream makefile) {
switch (this) {
case AlignMethod.INFERNAL:
makefile.printf("ALIGN_METHOD = infernal\n");
break;
case AlignMethod.MUSCLE:
makefile.printf("ALIGN_METHOD = muscle\n");
break;
case AlignMethod.PYNAST:
makefile.printf("ALIGN_METHOD = pynast\n");
break;
}
}
}
/**
* Output processor responsible for collecting all information needed to generate the Makefile and mapping.txt
*/
public class Output : Object {
/**
* The output directory name.
*/
public string dirname { get; private set; }
StringBuilder makerules;
/**
* All the samples currently processed in the file.
*
* They can be from multiple sources.
*/
public Gee.List<Sample> known_samples {
owned get {
return samples.read_only_view;
}
}
ArrayList<Sample> samples;
StringBuilder seqrule;
StringBuilder seqsources;
int sequence_preparations;
string sourcefile;
internal Pipelines pipeline;
internal string? classification_method;
internal string? otu_method;
internal string? otu_refseqs;
internal string? otu_blastdb;
internal string? otu_chimera_refseqs;
internal string? phylo_method;
internal string? clust_ident;
internal string? dist_cutoff;
internal string? otu_flags;
internal string? alignment_template;
internal string? class_taxa;
internal string? class_seqs;
internal AlignMethod alignmethod;
/**
* The defined variables and their types.
*
* (i.e., all the def tags)
*/
public HashMap<string, string> vars { get; private set; }
Set<string> pcoa;
Set<int> rareified;
Set<string> summarized_otus;
StringBuilder targets = new StringBuilder();
ArrayList<Xml.Doc*> doc_list;
internal bool verbose;
internal Output(string dirname, string sourcefile) {
this.dirname = dirname;
this.sourcefile = realpath(sourcefile);
sequence_preparations = 0;
makerules = new StringBuilder();
samples = new ArrayList<Sample>();
seqrule = new StringBuilder();
seqrule.printf("\t@echo Building sequence set...\n\t@test -d logs || mkdir logs\n\t@test ! -f seq.fasta || rm seq.fasta\n\t@test ! -f seq.group || rm seq.group\n");
seqsources = new StringBuilder();
pcoa = new HashSet<string>();
rareified = new HashSet<int>();
summarized_otus = new HashSet<string>();
targets = new StringBuilder();
vars = new HashMap<string, string>();
doc_list = new ArrayList<Xml.Doc*>();
}
/**
* Output the mapping.txt file in the appropriate directory.
*/
internal bool generate_mapping() {
var mapping = new StringBuilder();
var extra = new StringBuilder();
var headers = new StringBuilder();
if (mapping == null) {
stderr.printf("%s: Cannot create mapping file.\n", dirname);
return false;
}
mapping.append_printf("#SampleID");
extra.append_printf("#SampleID");
var first = true;
foreach (var entry in vars.entries) {
var isextra = entry.key == "Colour" || entry.key == "Description";
(isextra ? extra : mapping).append_printf("\t%s", entry.key);
if (!isextra) {
if (!first) {
headers.append_c('\t');
}
headers.append(entry.value == "i" || entry.value == "d" ? "TRUE" : "FALSE");
first = false;
}
}
mapping.append_c('\n');
extra.append("\tFile\tLine\n");
headers.append_c('\n');
var result = true;
foreach (var sample in samples) {
mapping.append_printf("%d", sample.id);
extra.append_printf("%d", sample.id);
foreach (var entry in vars.entries) {
var prop = sample.xml-> get_prop(entry.key);
if (prop == null) {
stderr.printf("%s: %d: Missing attribute %s.\n", sample.xml-> doc-> url, sample.xml-> line, entry.key);
(entry.key == "Colour" || entry.key == "Description" ? extra : mapping).append_printf("\t");
result = false;
} else {
if (entry.key == "Colour" || entry.key == "Description") {
extra.append_printf("\t%s", prop);
} else if (entry.value == "s") {
/* For strings, we are going to side step the Variant stuff because we want the XML to look like foo="bar" rather than foo="'bar'" as Variants would have it. */
mapping.append_printf("\t%s", prop);
} else {
try {
var value = Variant.parse(new VariantType(entry.value), prop);
mapping.append_printf("\t%s", value.print(false));
} catch(GLib.VariantParseError e) {
stderr.printf("%s: %d: Attribute %s:%s = \"%s\" is not of the correct format.\n", sample.xml-> doc-> url, sample.xml-> line, entry.key, entry.value, prop);
mapping.append_c('\t');
}
}
}
}
mapping.append_c('\n');
extra.append_printf("\t%s\t%d\n", sample.xml->doc->url, sample.xml->line);
}
return update_if_different("mapping.txt", mapping.str) && update_if_different("mapping.extra", extra.str) && update_if_different("headers.txt", headers.str) && result;
}
bool update_if_different(string filename, string newcontents) {
var filepath = Path.build_filename(dirname, filename);
if (FileUtils.test(filepath, FileTest.IS_REGULAR)) {
string current;
try {
if (FileUtils.get_contents(filepath, out current) && current == newcontents) {
return true;
}
} catch(FileError e) {
/* We probably don't care. We'll just attempt to write. */
}
}
try {
return FileUtils.set_contents(filepath, newcontents);
} catch(FileError e) {
stderr.printf("%s: %s\n", filepath, e.message);
}
return false;
}
/**
* Output the Makefile file in the appropriate directory.
*/
internal bool generate_makefile(RuleLookup lookup) {
var now = Time.local(time_t());
var makefile = FileStream.open(Path.build_filename(dirname, "Makefile"), "w");
if (makefile == null) {
stderr.printf("%s: Cannot create Makefile.\n", dirname);
return false;
}
makefile.printf("# Generated by %s from %s on %s\n# Modify at your own peril!\n# Built for QIIME ", PACKAGE, sourcefile, now.to_string());
makefile.printf("%d", qiime_version[0]);
for(var it = 1; it < qiime_version.length; it++) {
makefile.printf(".%d", qiime_version[it]);
}
switch ( pipeline.to_string() ) {
case "qiime":
makefile.printf("\n\nPIPELINE = QIIME\n");
break;
case "mothur":
makefile.printf("\n\nPIPELINE = MOTHUR\n");
break;
default:
makefile.printf("\n\nPIPELINE = QIIME\n");
break;
}
//Declare a variable that has our version number in it for Make to use
if ( is_version_at_least(1,5) ) {
makefile.printf("\nQIIME_GREATER_THAN_1_5 = TRUE");
}
if ( is_version_at_least(1,6) ) {
makefile.printf("\nQIIME_GREATER_THAN_1_6 = TRUE");
}
if ( is_version_at_least(1,8) ) {
makefile.printf("\nQIIME_1_8 = TRUE");
}
makefile.printf("\n\nall: Makefile mapping.txt otu_table.txt %s\n\n", targets.str);
makefile.printf("Makefile mapping.txt: %s\n\t@echo Updating analyses to be run...\n\t$(V)axiome $<\n\n", sourcefile);
if (classification_method != null) {
makefile.printf("CLASSIFICATION_METHOD = %s\n", classification_method);
}
if (otu_method != null) {
makefile.printf("OTU_PICKING_METHOD = %s\n", otu_method);
}
if (otu_refseqs != null) {
makefile.printf("OTU_REFSEQS = %s\n", otu_refseqs);
}
if (otu_blastdb != null) {
makefile.printf("OTU_BLASTDB = %s\n", otu_blastdb);
}
if (otu_chimera_refseqs != null) {
makefile.printf("OTU_CHIMERA_REFSEQS = %s\n", otu_chimera_refseqs);
}
if (phylo_method != null) {
makefile.printf("PHYLO_METHOD = %s\n", phylo_method);
}
if (clust_ident != null) {
makefile.printf("CLUSTER_IDENT = %s\n", clust_ident);
}
if (dist_cutoff != null) {
makefile.printf("DIST_CUTOFF = %s\n", dist_cutoff);
}
if (otu_flags != null) {
makefile.printf("OTU_FLAGS = %s\n", otu_flags);
}
if (alignment_template != null) {
makefile.printf("ALIGNMENT_TEMPLATE = %s\n", alignment_template);
}
if (class_taxa != null) {
makefile.printf("CLASS_TAXA = %s\n", class_taxa);
}
if (class_seqs != null) {
makefile.printf("CLASS_SEQS = %s\n", class_seqs);
}
if (verbose) {
makefile.printf("V = \n");
}
alignmethod.print(makefile);
makefile.printf("SEQSOURCES =%s\n\nseq.fasta seq.group: $(SEQSOURCES)\n%s", seqsources.str, seqrule.str);
//Print out the stats for the sample file
makefile.printf("\t$(V)awk '{ if (NR == 1) { print \"Sample\\tBarcode\\tSequences Contributed\\n\" } if (min == \"\") { min = max = $$3 }; if ( $$3 > max ) { max = $$3 }; if ( $$3 < min ) { min = $$3 }; total += $$3; count += 1; print; } END { print \"\\nAverage Sequences Contributed: \" total/count \"\\nSmallest Sequences Contributed: \" min \"\\nLargest Sequences Contributed: \" max }' sample_reads_temp.log > sample_reads.log\n\n");
makefile.printf("\t$(V)rm sample_reads_temp.log\n\n");
makefile.printf("%s.PHONY: all\n\ninclude %s/aq-base\n", makerules.str, BINDIR);
makefile.printf("include %s/aq-qiime-base\n", BINDIR);
makefile.printf("include %s/aq-mothur-base\n", BINDIR);
lookup.print_include(makefile);
makefile = null;
return true;
}
/**
* Generate a summarized OTU table
*
* @param level the taxonomic level at which to summarise.
* @param flavour an optional part of the filename if you have some extra information to convey (e.g., rarefication depth).
*/
public void make_summarized_otu(TaxonomicLevel level, string flavour) {
var taxname = level.to_string();
var taxindex = (int) level;
var type = @"$(taxname)$(flavour)";
if (!(type in summarized_otus)) {
summarized_otus.add(type);
if (is_version_at_least(1, 3)) {
makerules.append(@"otu_table_summarized_$(taxname)$(flavour).txt: otu_table$(flavour).txt\n\t@echo Summarizing OTU table $(flavour) to $(taxname)-level...\n\t$$(V)$$(QIIME_PREFIX)summarize_taxa.py -i otu_table$(flavour).txt -L $(taxindex) -o . -a\n\t@mv otu_table$(flavour)_L$(taxindex).txt otu_table_summarized_$(taxname)$(flavour).txt\n\n");
} else {
makerules.append(@"otu_table_summarized_$(taxname)$(flavour).txt: otu_table$(flavour).txt\n\t@echo Summarizing OTU table $(flavour) to $(taxname)-level...\n\t$$(V)$$(QIIME_PREFIX)summarize_taxa.py -i otu_table$(flavour).txt -L $(taxindex) -o otu_table_summarized_$(taxname)$(flavour).txt -a\n\n");
}
}
}
/**
* Generate rareified OTU tables
*/
public void make_rarefied(int size) {
if (size in rareified) {
return;
}
rareified.add(size);
makerules.append(@"otu_table_$(size).txt: otu_table.txt\n\tRareifying OTU table to $(size) sequences...\n\t$$(V)$$(QIIME_PREFIX)single_rarefaction.py -i otu_table.txt -o otu_table_auto.txt -d $(size) $(is_version_at_least(1, 3) ? "" : "--lineages_included")\n\n");
}
/**
* Generate beta-diversity (Unifrac PCOA) analysis
*
* This assumes the OTU has already been generated.
*/
public void make_pcoa(string flavour) {
if (flavour in pcoa) {
return;
}
pcoa.add(flavour);
makerules.append(@"beta_div$(flavour)/unweighted_unifrac_otu_table$(flavour).txt beta_div$(flavour)/weighted_unifrac_otu_table$(flavour).txt: otu_table$(flavour).txt seq.fasta_rep_set_aligned_pfiltered.tre\n\t@echo Doing beta diversity analysis $(flavour)...\nifdef MULTICOREBROKEN\n\t$$(V)$$(QIIME_PREFIX)parallel_beta_diversity.py -i otu_table$(flavour).txt -m weighted_unifrac,unweighted_unifrac -o beta_div$(flavour) -t seq.fasta_rep_set_aligned_pfiltered.tre -O $$(NUM_CORES)\nelse\n\t$$(V)$$(QIIME_PREFIX)beta_diversity.py -i otu_table$(flavour).txt -m weighted_unifrac,unweighted_unifrac -o beta_div$(flavour) -t seq.fasta_rep_set_aligned_pfiltered.tre\nendif\n\n");
makerules.append(@"beta_div_pcoa$(flavour)/pcoa_unweighted_unifrac_otu_table$(flavour).txt beta_div_pcoa$(flavour)/pcoa_weighted_unifrac_otu_table$(flavour).txt: beta_div$(flavour)/unweighted_unifrac_otu_table$(flavour).txt beta_div$(flavour)/weighted_unifrac_otu_table$(flavour).txt\n\t@echo Computing principal coordinates $(flavour)...\n\t$$(V)$$(QIIME_PREFIX)principal_coordinates.py -i beta_div$(flavour) -o beta_div_pcoa$(flavour)\n\n");
}
/**
* Add a file to the targets to be built by make.
*/
public void add_target(string file) {
targets.append_printf(" %s", file);
}
/**
* Add a file to the list of sources needed to build seq.fasta.
*/
public void add_sequence_source(string file) {
seqsources.append_printf(" %s", file);
}
/**
* Add a rule to the Makefile.
*
* In reality, this allows you to append arbitrary content to the innards of the makefile. Obviously, you must output valid make rules and definitions which do not conflict with other definitions.
*/
public void add_rule(string str) {
makerules.append(str);
}
/**
* Add a rule to the Makefile.
*
* In reality, this allows you to append arbitrary content to the innards of the makefile. Obviously, you must output valid make rules and definitions which do not conflict with other definitions.
*/
[PrintfFormat]
public void add_rulef(string format, ...) {
var va = va_list();
makerules.append_vprintf(format, va);
}
/**
* Add declaration to the make Makefile.
*/
public new void set(string key, string value) {
makerules.append_printf("%s = %s\n\n", key, value);
}
/**
* Register an XML “sample” element containing attributes satisfying the metadata requirements of the “defs”.
*
* @return the unique identifier for a sample. This must be associated with the map used in {@link prepare_sequences}.
*/
internal Sample add_sample(string tag, Xml.Node *sample) {
var sample_obj = new Sample();
sample_obj.limit = -1;
sample_obj.xml = sample;
sample_obj.id = samples.size;
sample_obj.tag = tag;
samples.add(sample_obj);
return sample_obj;
}
/**
* Create a rule to extract sequence data from a command.
*
* It is assumed the supplied command will output FASTA data. The FASTA sequences will be binned into samples and the error output will be saved to a file.
* @param prep the command to prepare the sequence
*/
internal void prepare_sequences(string prep, Collection<Sample> samples) {
var awkprint = new StringBuilder();
var awkcheck = new StringBuilder();
foreach (var sample in samples) {
if (sample.tag != "*") {
awkprint.append_printf(" if (name ~ /%s/", sample.tag);
} else {
//Hacky approach to not doing a filter when sample tag is *
awkprint.append_printf(" if ( 1");
}
if (sample.limit > 0) {
awkprint.append_printf(" && count%d < %d", sample.id, sample.limit);
}
awkprint.append_printf(") { print \">%d_\" NR \"\\n\" seq; print \"%d_\" NR \"\\t%d\" >> \"seq.group\"; count%d++; }", sample.id, sample.id, sample.id, sample.id);
awkcheck.append_printf(" if (count%d == 0) { print \"Library defined in %s:%d contributed no sequences. This is probably not what you want.\" > \"/dev/stderr\"; print \"%d\\tWarning: %s contributed no sequences to library\" >> \"sample_reads_temp.log\" } else { ", sample.id, sample.xml-> doc-> url, sample.xml-> line, sample.id, sample.tag);
awkcheck.append_printf("print \"%d\\t%s\\t\" count%d >> \"sample_reads_temp.log\" }", sample.id, sample.tag, sample.id);
}
seqrule.append_printf("\t$(V)(%s | awk '/^>/ { if (seq) {%s } name = substr($$0, 2); seq = \"\"; } $$0 !~ /^>/ { seq = seq $$0; } END { if (seq) {%s }%s }' >> seq.fasta) 2>&1 | bzip2 > logs/seq_%d.log.bz2\n\n", prep, awkprint.str, awkprint.str, awkcheck.str, sequence_preparations++);
}
/**
* Include and process another parsed XML document.
*/
public void add_doc(Xml.Doc* doc) {
doc_list.add(doc);
}
~Output() {
foreach(var doc in doc_list) {
delete doc;
}
}
}
/**
* Complain about something in an XML tag with some context for the user.
*/
[PrintfFormat]
public void definition_error(Xml.Node *node, string format, ...) {
var va = va_list();
stderr.printf("%s: %d: ", node-> doc-> url, node-> line);
stderr.vprintf(format, va);
}
/**
* Determine how compressed files are processed.
*/
public enum FileCompression {
PLAIN,
GZIP,
BZIP;
/**
* Get the tool that one would use to render the file to plain text.
*/
public string get_cat() {
switch (this) {
case FileCompression.GZIP :
return "gunzip -c";
case FileCompression.BZIP :
return "bzcat";
default :
return "cat";
}
}
/**
* Use magic to determine the compression format of the supplied file.
*/
public static FileCompression for_file(string file) {
var magic = new LibMagic.Magic(LibMagic.Flags.SYMLINK|LibMagic.Flags.MIME_TYPE);
magic.load();
var mime = magic.file(file);
if (mime == null) {
return PLAIN;
} else if (mime.has_prefix("application/x-bzip2")) {
return BZIP;
} else if (mime.has_prefix("application/x-gzip")) {
return GZIP;
} else {
return PLAIN;
}
}
}
/**
* Class to provide access to {@link RuleProcessor}s in the correct parsing order.
*/
class RuleLookup : TypeModule {
RuleType state;
HashMap<string, RuleProcessor> table;
HashSet<string> seen;
public RuleLookup() {
state = RuleType.DEFINITON;
table = new HashMap<string, RuleProcessor>();
seen = new HashSet<string>();
}
/**
* Write the list of files the processors need to be included to the Makefile.
*/
public void print_include(FileStream stream) {
foreach (var rule in table.values) {
var file = rule.get_include();
if (file != null) {
assert(is_valid_filename(file));
stream.printf("include %s\n", file);
}
}
}
/**
* Get the appropriate processor and update state so that the file is ensured to be in the correct order.
*/
public new RuleProcessor ? @get(string name) {
if (!table.has_key(name)) {
return null;
}
if (name in seen) {
return null;
}
var processor = table[name];
var type = processor.get_ruletype();
if (type < state) {
return null;
}
if (processor.is_only_once()) {
seen.add(name);
}
state = type;
return processor;
}
/**
* Register a new file processor.
*/
public void add(RuleProcessor processor) {
var name = processor.get_name();
table[name] = processor;
}
/**
* Register all rule processors in a type heirarchy. This assumes the can be instantiated with an empty constructor.
*/
public void add_children(Type t) requires (t.is_a(typeof(RuleProcessor))) {
foreach (var child in t.children()) {
if (child.is_instantiatable() && !child.is_abstract()) {
add((RuleProcessor) Object.new(child));
}
add_children(child);
}
}
/**
* Discover dynamically loadable modules/plugins.
*/
public void find_modules() {
if (!Module.supported())
return;
var dir = File.new_for_path(MODDIR);
if (dir == null)
return;
try {
FileInfo? info = dir.query_info(FILE_ATTRIBUTE_STANDARD_TYPE, FileQueryInfoFlags.NONE, null);
if (info == null || info.get_file_type() != FileType.DIRECTORY)
return;
var it = dir.enumerate_children("standard::*", FileQueryInfoFlags.NONE);
while((info = it.next_file()) != null) {
var file = dir.get_child(info.get_name());
if (info.get_file_type() == FileType.DIRECTORY)
continue;
if (ContentType.get_mime_type(info.get_content_type()) == "application/x-sharedlib") {
var file_path = Path.build_filename(file.get_path(), info.get_name());
var module = Module.open (file_path, ModuleFlags.BIND_LOCAL);
if (module != null) {
void* function;
if (module.symbol("init", out function) && function != null) {
var init_func = (InitFunc) function;
module.make_resident();
init_func(this);
}
}
}
}
} catch (GLib.Error error) {
if (!(error is IOError.NOT_FOUND)) {
warning("Failed to discover modules in %s. %s", MODDIR, error.message);
}
return;
}
}
}
[CCode(has_target = false)]
delegate void InitFunc(TypeModule module);
/**
* Processor for definitions (aka “def” tags) in the input file.
*/
class Definition : RuleProcessor {
public override RuleType get_ruletype() {
return RuleType.DEFINITON;
}
public override unowned string get_name() {
return "def";
}
public override unowned string ? get_include() {
return null;
}
public override version introduced_version() {
return version(1, 0);
}
public override bool is_only_once() {
return false;
}
public override bool process(Xml.Node *definition, Output output) {
var name = definition-> get_prop("name");
if (name == null) {
definition_error(definition, "Definition missing name.\n");
return false;
}
if (name == "regex" || name == "tag" || name == "limit") {
definition_error(definition, "Reserved name used for definition.\n");
return false;
}
if (output.vars.has_key(name)) {
definition_error(definition, "Duplicate definition of %s.\n", name);
return false;
}
var type = definition-> get_prop("type");
if (type == null) {
output.vars[name] = "s";
} else if (VariantType.string_is_valid(type)) {
output.vars[name] = type;
} else {
definition_error(definition, "Invalid type %s for %s.\n", type, name);
return false;
}
return true;
}
}
/**
* POSIX realpath function to caonicalise a path. Sadly, this is not in GLib anywhere.
*/
[CCode(cname = "realpath", cheader_filename = "stdlib.h")]
extern string realpath(string path, [CCode(array_length = false, null_terminated = true)] char[] ? buffer = null);
const string QIIME_VERSION_MARKER = "QIIME library version:\t";
/**
* Check that we have a new enough release of QIIME on this system.
*/
public bool is_version_at_least(int major, int minor) {
if (qiime_version.length > 0 && qiime_version[0] > major)
return true;
if (qiime_version.length > 1 && qiime_version[0] == major && qiime_version[1] >= minor)
return true;
return false;
}
int[] qiime_version;
/**
* Determine the QIIME version
*/
int[]? get_qiime_version() {
string output;
string error;
int status;
// Get QIIME_PREFIX, and if it is NULL, set to empty string
var qiime_config = (Environment.get_variable("QIIME_PREFIX")??"") + "print_qiime_config.py";
try {
if (!Process.spawn_command_line_sync(qiime_config, out output, out error, out status) || status != 0) {
stderr.printf("Could not run \"%s\". The error output was:\n%s\n", qiime_config, error);
return null;
}
} catch (SpawnError e) {
stderr.printf("Could not run \"%s\": %s\n", qiime_config, e.message);
return null;
}
var index = output.index_of(QIIME_VERSION_MARKER);
if (index == -1) {
stderr.printf("\"%s\" doesn't have a version string like I expect.\n", qiime_config);
return null;
}
index += QIIME_VERSION_MARKER.length;
int[] parts = {};
int current = -1;
while(index < output.length && output[index] != '\n') {
if(output[index].isdigit()) {
if (current == -1) {
current = (int) (output[index] - '0');
} else {
current = current * 10 + (int) (output[index] - '0');
}
} else if (output[index] == '.') {
parts += current;
current = -1;
}
index++;
}
if (current != -1) {
parts += current;
}
if (parts.length == 0) {
stderr.printf("Could not make sense of the version from \"%s\".\n", qiime_config);
return null;
}
stdout.printf("QIIME version: ");
for (int i = 0; i < parts.length; i++) {
if (i > 0)
stdout.putc('.');
stdout.printf("%d", parts[i]);
}
stdout.putc('\n');
return parts;
}
bool process_document(string filename, RuleLookup lookup, Output output, bool is_root = false) {
var absfilename = realpath(filename);
if (absfilename == null) {
stderr.printf("%s: Cannot canonicalize path.\n", filename);
return false;
}
Xml.Doc *doc = Parser.parse_file(absfilename);
if (doc == null) {