diff --git a/adam-apis/pom.xml b/adam-apis/pom.xml index 9645279672..76a256d9a8 100644 --- a/adam-apis/pom.xml +++ b/adam-apis/pom.xml @@ -3,14 +3,14 @@ 4.0.0 org.bdgenomics.adam - adam-parent + adam-parent_2.10 0.16.1-SNAPSHOT ../pom.xml - adam-apis + adam-apis_2.10 jar - ADAM: APIs for Java + ADAM_2.10: APIs for Java @@ -95,11 +95,11 @@ org.apache.spark - spark-core_${scala.artifact.suffix} + spark-core_2.10 - org.bdgenomics.bdg-utils - bdg-utils-misc + org.bdgenomics.utils + utils-misc_2.10 test-jar test @@ -109,11 +109,11 @@ org.bdgenomics.adam - adam-core + adam-core_2.10 org.bdgenomics.adam - adam-core + adam-core_2.10 test-jar test @@ -131,7 +131,7 @@ org.scalatest - scalatest_${scala.artifact.suffix} + scalatest_2.10 test diff --git a/adam-cli/pom.xml b/adam-cli/pom.xml index c6bed3d7a3..71a794d226 100644 --- a/adam-cli/pom.xml +++ b/adam-cli/pom.xml @@ -3,14 +3,14 @@ 4.0.0 org.bdgenomics.adam - adam-parent + adam-parent_2.10 0.16.1-SNAPSHOT ../pom.xml - adam-cli + adam-cli_2.10 jar - ADAM: CLI + ADAM_2.10: CLI @@ -104,25 +104,29 @@ org.apache.spark - spark-core_${scala.artifact.suffix} + spark-core_2.10 - org.bdgenomics.bdg-utils - bdg-utils-misc + org.bdgenomics.utils + utils-misc_2.10 test-jar test - org.bdgenomics.bdg-utils - bdg-utils-parquet + org.bdgenomics.utils + utils-io_2.10 - org.bdgenomics.bdg-utils - bdg-utils-metrics + org.bdgenomics.utils + utils-cli_2.10 + + + org.bdgenomics.utils + utils-metrics_2.10 org.scoverage - scalac-scoverage-plugin_${scala.artifact.suffix} + scalac-scoverage-plugin_2.10 org.bdgenomics.bdg-formats @@ -130,21 +134,21 @@ org.bdgenomics.adam - adam-core + adam-core_2.10 org.bdgenomics.adam - adam-core + adam-core_2.10 test-jar test org.bdgenomics.adam - adam-apis + adam-apis_2.10 org.bdgenomics.adam - adam-apis + adam-apis_2.10 test-jar test @@ -160,21 +164,9 @@ args4j args4j - - org.fusesource.scalate - scalate-core_2.10 - - - org.scalatra - scalatra-json_2.10 - - - org.scalatra - scalatra_2.10 - org.scalatest - scalatest_${scala.artifact.suffix} + scalatest_2.10 test diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala index 0c69918a7b..1ed6b28e1e 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala @@ -17,17 +17,18 @@ */ package org.bdgenomics.adam.cli -import org.bdgenomics.formats.avro.Genotype -import org.bdgenomics.adam.rdd.ADAMContext._ -import org.kohsuke.args4j.{ Option => Args4jOption, Argument } +import java.io.File import org.apache.spark.rdd.RDD import org.apache.spark.{ Logging, SparkContext } import org.apache.hadoop.mapreduce.Job -import java.io.File import org.bdgenomics.adam.models.SequenceDictionary +import org.bdgenomics.adam.rdd.ADAMContext._ +import org.bdgenomics.formats.avro.Genotype +import org.bdgenomics.utils.cli._ +import org.kohsuke.args4j.{ Option => Args4jOption, Argument } import scala.Option -object ADAM2Vcf extends ADAMCommandCompanion { +object ADAM2Vcf extends BDGCommandCompanion { val commandName = "adam2vcf" val commandDescription = "Convert an ADAM variant to the VCF ADAM format" @@ -54,10 +55,10 @@ class ADAM2VcfArgs extends Args4jBase with ParquetArgs { var sort: Boolean = false } -class ADAM2Vcf(val args: ADAM2VcfArgs) extends ADAMSparkCommand[ADAM2VcfArgs] with DictionaryCommand with Logging { +class ADAM2Vcf(val args: ADAM2VcfArgs) extends BDGSparkCommand[ADAM2VcfArgs] with DictionaryCommand with Logging { val companion = ADAM2Vcf - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { var dictionary: Option[SequenceDictionary] = loadSequenceDictionary(args.dictionaryFile) if (dictionary.isDefined) log.info("Using contig translation") diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala deleted file mode 100644 index 93ebec5f26..0000000000 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.cli - -import java.io.{ StringWriter, PrintWriter } - -import org.apache.hadoop.mapreduce.Job -import org.apache.spark.{ SparkConf, Logging, SparkContext } -import org.bdgenomics.utils.instrumentation._ -import org.bdgenomics.adam.util.HadoopUtil - -trait ADAMCommandCompanion { - val commandName: String - val commandDescription: String - - def apply(cmdLine: Array[String]): ADAMCommand - - // Make running an ADAM command easier from an IDE - def main(cmdLine: Array[String]) { - apply(cmdLine).run() - } -} - -trait ADAMCommand extends Runnable { - val companion: ADAMCommandCompanion -} - -trait ADAMSparkCommand[A <: Args4jBase] extends ADAMCommand with Logging { - protected val args: A - - def run(sc: SparkContext, job: Job) - - def run() { - val start = System.nanoTime() - val conf = new SparkConf().setAppName("adam: " + companion.commandName) - if (conf.getOption("spark.master").isEmpty) { - conf.setMaster("local[%d]".format(Runtime.getRuntime.availableProcessors())) - } - val sc = new SparkContext(conf) - val job = HadoopUtil.newJob() - val metricsListener = initializeMetrics(sc) - run(sc, job) - val totalTime = System.nanoTime() - start - printMetrics(totalTime, metricsListener) - } - - def initializeMetrics(sc: SparkContext): Option[MetricsListener] = { - if (args.printMetrics) { - val metricsListener = new MetricsListener(new RecordedMetrics()) - sc.addSparkListener(metricsListener) - Metrics.initialize(sc) - Some(metricsListener) - } else { - // This avoids recording metrics if we have a recorder left over from previous use of this thread - Metrics.stopRecording() - None - } - } - - def printMetrics(totalTime: Long, metricsListener: Option[MetricsListener]) { - logInfo("Overall Duration: " + DurationFormatting.formatNanosecondDuration(totalTime)) - if (args.printMetrics && metricsListener.isDefined) { - // Set the output buffer size to 4KB by default - val stringWriter = new StringWriter() - val out = new PrintWriter(stringWriter) - out.println("Metrics:") - out.println() - Metrics.print(out, Some(metricsListener.get.metrics.sparkMetrics.stageTimes)) - out.println() - metricsListener.get.metrics.sparkMetrics.print(out) - out.flush() - logInfo(stringWriter.getBuffer.toString) - } - } - -} diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala index 39f83aa34f..59319c56d4 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala @@ -17,15 +17,16 @@ */ package org.bdgenomics.adam.cli +import java.util.logging.Level._ import org.apache.spark.Logging +import org.bdgenomics.adam.util.ParquetLogger +import org.bdgenomics.utils.cli._ import scala.Some import scala.collection.mutable.ListBuffer -import org.bdgenomics.adam.util.ParquetLogger -import java.util.logging.Level._ object ADAMMain extends Logging { - case class CommandGroup(name: String, commands: List[ADAMCommandCompanion]) + case class CommandGroup(name: String, commands: List[BDGCommandCompanion]) private val commandGroups = List( diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala index 4dc213f7e6..2554a5a0f0 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala @@ -25,6 +25,7 @@ import org.apache.spark.storage.StorageLevel import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Option => Args4JOption, Argument } class Adam2FastqArgs extends ParquetLoadSaveArgs { @@ -40,7 +41,7 @@ class Adam2FastqArgs extends ParquetLoadSaveArgs { var disableProjection: Boolean = false } -object Adam2Fastq extends ADAMCommandCompanion { +object Adam2Fastq extends BDGCommandCompanion { override val commandName = "adam2fastq" override val commandDescription = "Convert BAM to FASTQ files" @@ -48,10 +49,10 @@ object Adam2Fastq extends ADAMCommandCompanion { new Adam2Fastq(Args4j[Adam2FastqArgs](cmdLine)) } -class Adam2Fastq(val args: Adam2FastqArgs) extends ADAMSparkCommand[Adam2FastqArgs] { +class Adam2Fastq(val args: Adam2FastqArgs) extends BDGSparkCommand[Adam2FastqArgs] { override val companion = Adam2Fastq - override def run(sc: SparkContext, job: Job): Unit = { + override def run(sc: SparkContext): Unit = { val projectionOpt = if (!args.disableProjection) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala index 233526481f..4e2d333928 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala @@ -23,9 +23,10 @@ import org.apache.spark.{ Logging, SparkContext } import org.apache.spark.rdd.RDD import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele } +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.Argument -object AlleleCount extends ADAMCommandCompanion { +object AlleleCount extends BDGCommandCompanion { val commandName = "allelecount" val commandDescription = "Calculate Allele frequencies" @@ -65,10 +66,10 @@ object AlleleCountHelper extends Serializable { } } -class AlleleCount(val args: AlleleCountArgs) extends ADAMSparkCommand[AlleleCountArgs] with Logging { +class AlleleCount(val args: AlleleCountArgs) extends BDGSparkCommand[AlleleCountArgs] with Logging { val companion = AlleleCount - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { val adamVariants: RDD[Genotype] = sc.loadGenotypes(args.adamFile) AlleleCountHelper.countAlleles(adamVariants, args) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala deleted file mode 100644 index 0aa4af218d..0000000000 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.cli - -import org.kohsuke.args4j.{ Option, CmdLineException, CmdLineParser } -import scala.collection.JavaConversions._ - -class Args4jBase { - @Option(name = "-h", aliases = Array("-help", "--help", "-?"), usage = "Print help") - var doPrintUsage: Boolean = false - @Option(name = "-print_metrics", usage = "Print metrics to the log on completion") - var printMetrics: Boolean = false -} - -object Args4j { - val helpOptions = Array("-h", "-help", "--help", "-?") - - def apply[T <% Args4jBase: Manifest](args: Array[String], ignoreCmdLineExceptions: Boolean = false): T = { - val args4j: T = manifest[T].runtimeClass.asInstanceOf[Class[T]].newInstance() - val parser = new CmdLineParser(args4j) - parser.setUsageWidth(150); - - def displayHelp(exitCode: Int = 0) = { - parser.printUsage(System.out) - System.exit(exitCode) - } - - // Work around for help processing in Args4j - if (args.exists(helpOptions.contains(_))) { - displayHelp() - } - - try { - parser.parseArgument(args.toList) - if (args4j.doPrintUsage) - displayHelp() - } catch { - case e: CmdLineException => - if (!ignoreCmdLineExceptions) { - println(e.getMessage) - displayHelp(1) - } - } - - args4j - } - -} - diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala index c7fb9d66b7..2b0e7494ae 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala @@ -24,15 +24,16 @@ import org.apache.hadoop.fs.Path import org.bdgenomics.adam.converters.SAMRecordConverter import org.bdgenomics.adam.models.{ RecordGroupDictionary, SequenceDictionary } import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } import parquet.avro.AvroParquetWriter import scala.collection.JavaConversions._ -object Bam2ADAM extends ADAMCommandCompanion { +object Bam2ADAM extends BDGCommandCompanion { val commandName: String = "bam2adam" val commandDescription: String = "Single-node BAM to ADAM converter (Note: the 'transform' command can take SAM or BAM as input)" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new Bam2ADAM(Args4j[Bam2ADAMArgs](cmdLine)) } } @@ -50,7 +51,7 @@ class Bam2ADAMArgs extends Args4jBase with ParquetArgs { var qSize = 10000 } -class Bam2ADAM(args: Bam2ADAMArgs) extends ADAMCommand { +class Bam2ADAM(args: Bam2ADAMArgs) extends BDGCommand { val companion = Bam2ADAM val blockingQueue = new LinkedBlockingQueue[Option[(SAMRecord, SequenceDictionary, RecordGroupDictionary)]](args.qSize) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala index e96bd65a59..8ad727eae2 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala @@ -17,16 +17,18 @@ */ package org.bdgenomics.adam.cli -object BuildInformation extends ADAMCommandCompanion { +import org.bdgenomics.utils.cli._ + +object BuildInformation extends BDGCommandCompanion { val commandName: String = "buildinfo" val commandDescription: String = "Display build information (use this for bug reports)" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new BuildInformation() } } -class BuildInformation() extends ADAMCommand { +class BuildInformation() extends BDGCommand { val companion = BuildInformation def run() = { diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala index b56165eda5..0e46e461bf 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala @@ -29,6 +29,7 @@ import org.bdgenomics.adam.projections.AlignmentRecordField._ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.BroadcastRegionJoin import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import scala.io._ /** @@ -38,12 +39,12 @@ import scala.io._ * It then reports, on standard out, the location and name of each variant along with the * calculated depth. */ -object CalculateDepth extends ADAMCommandCompanion { +object CalculateDepth extends BDGCommandCompanion { val commandName: String = "depth" val commandDescription: String = "Calculate the depth from a given ADAM file, " + "at each variant in a VCF" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new CalculateDepth(Args4j[CalculateDepthArgs](cmdLine)) } } @@ -59,10 +60,10 @@ class CalculateDepthArgs extends Args4jBase with ParquetArgs { val cartesian: Boolean = false } -class CalculateDepth(protected val args: CalculateDepthArgs) extends ADAMSparkCommand[CalculateDepthArgs] { - val companion: ADAMCommandCompanion = CalculateDepth +class CalculateDepth(protected val args: CalculateDepthArgs) extends BDGSparkCommand[CalculateDepthArgs] { + val companion: BDGCommandCompanion = CalculateDepth - def run(sc: SparkContext, job: Job): Unit = { + def run(sc: SparkContext): Unit = { val proj = Projection(contig, start, cigar, readMapped) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala deleted file mode 100644 index 8e9b879373..0000000000 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala +++ /dev/null @@ -1,17 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala index f27f0d878c..b0e957f215 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala @@ -24,9 +24,10 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.util.ParquetLogger import org.bdgenomics.formats.avro.NucleotideContigFragment +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -object CountContigKmers extends ADAMCommandCompanion { +object CountContigKmers extends BDGCommandCompanion { val commandName = "count_contig_kmers" val commandDescription = "Counts the k-mers/q-mers from a read dataset." @@ -46,10 +47,10 @@ class CountContigKmersArgs extends Args4jBase with ParquetArgs { var printHistogram: Boolean = false } -class CountContigKmers(protected val args: CountContigKmersArgs) extends ADAMSparkCommand[CountContigKmersArgs] with Logging { +class CountContigKmers(protected val args: CountContigKmersArgs) extends BDGSparkCommand[CountContigKmersArgs] with Logging { val companion = CountContigKmers - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { // Quiet Parquet... ParquetLogger.hadoopLoggerLevel(Level.SEVERE) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala index f4e76e1bec..caf33b0e66 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala @@ -25,9 +25,10 @@ import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.util.ParquetLogger import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -object CountReadKmers extends ADAMCommandCompanion { +object CountReadKmers extends BDGCommandCompanion { val commandName = "count_kmers" val commandDescription = "Counts the k-mers/q-mers from a read dataset." @@ -51,10 +52,10 @@ class CountReadKmersArgs extends Args4jBase with ParquetArgs { var repartition: Int = -1 } -class CountReadKmers(protected val args: CountReadKmersArgs) extends ADAMSparkCommand[CountReadKmersArgs] with Logging { +class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCommand[CountReadKmersArgs] with Logging { val companion = CountReadKmers - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { // Quiet Parquet... ParquetLogger.hadoopLoggerLevel(Level.SEVERE) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala index 47449151b8..d1f1b3dd0a 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala @@ -17,16 +17,16 @@ */ package org.bdgenomics.adam.cli -import org.apache.hadoop.mapreduce.Job import org.apache.spark.{ Logging, SparkContext } import org.bdgenomics.adam.rdd.ADAMContext._ +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -object Fasta2ADAM extends ADAMCommandCompanion { +object Fasta2ADAM extends BDGCommandCompanion { val commandName: String = "fasta2adam" val commandDescription: String = "Converts a text FASTA sequence file into an ADAMNucleotideContig Parquet file which represents assembled sequences." - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new Fasta2ADAM(Args4j[Fasta2ADAMArgs](cmdLine)) } } @@ -44,10 +44,10 @@ class Fasta2ADAMArgs extends Args4jBase with ParquetSaveArgs { var fragmentLength: Long = 10000L } -class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends ADAMSparkCommand[Fasta2ADAMArgs] with Logging { +class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends BDGSparkCommand[Fasta2ADAMArgs] with Logging { val companion = Fasta2ADAM - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { log.info("Loading FASTA data from disk.") val adamFasta = sc.loadFasta(args.fastaFile, fragmentLength = args.fragmentLength) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala index db6f3a6d8d..360595e17e 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala @@ -23,9 +23,10 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models.BaseFeature import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.Feature +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.Argument -object Features2ADAM extends ADAMCommandCompanion { +object Features2ADAM extends BDGCommandCompanion { val commandName = "features2adam" val commandDescription = "Convert a file with sequence features into corresponding ADAM format" @@ -44,10 +45,10 @@ class Features2ADAMArgs extends Args4jBase with ParquetSaveArgs { } class Features2ADAM(val args: Features2ADAMArgs) - extends ADAMSparkCommand[Features2ADAMArgs] { + extends BDGSparkCommand[Features2ADAMArgs] { val companion = Features2ADAM - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { sc.loadFeatures(args.featuresFile).adamParquetSave(args) } } diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala index 1806c854d0..e4c9f24a12 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala @@ -23,13 +23,14 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.projections.{ Projection, AlignmentRecordField } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.Argument -object FlagStat extends ADAMCommandCompanion { +object FlagStat extends BDGCommandCompanion { val commandName: String = "flagstat" val commandDescription: String = "Print statistics on reads in an ADAM file (similar to samtools flagstat)" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new FlagStat(Args4j[FlagStatArgs](cmdLine)) } } @@ -39,10 +40,10 @@ class FlagStatArgs extends Args4jBase with ParquetArgs { val inputPath: String = null } -class FlagStat(protected val args: FlagStatArgs) extends ADAMSparkCommand[FlagStatArgs] { - val companion: ADAMCommandCompanion = FlagStat +class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagStatArgs] { + val companion: BDGCommandCompanion = FlagStat - def run(sc: SparkContext, job: Job): Unit = { + def run(sc: SparkContext): Unit = { val projection = Projection( AlignmentRecordField.readMapped, diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala index 4d371189ce..ffae66ee3b 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala @@ -19,17 +19,18 @@ package org.bdgenomics.adam.cli import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord -import org.apache.hadoop.mapreduce.Job import org.apache.spark.rdd.MetricsContext._ import org.apache.spark.{ Logging, SparkContext } import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.util.{ Flattener, HadoopUtil } +import org.bdgenomics.adam.util.Flattener +import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.instrumentation.Metrics +import org.bdgenomics.utils.misc.HadoopUtil import org.kohsuke.args4j.Argument import parquet.avro.AvroParquetInputFormat import parquet.hadoop.util.ContextUtil -object Flatten extends ADAMCommandCompanion { +object Flatten extends BDGCommandCompanion { val commandName = "flatten" val commandDescription = "Convert a ADAM format file to a version with a flattened " + "schema, suitable for querying with tools like Impala" @@ -48,10 +49,10 @@ class FlattenArgs extends Args4jBase with ParquetSaveArgs { var outputPath: String = null } -class Flatten(val args: FlattenArgs) extends ADAMSparkCommand[FlattenArgs] with Logging { +class Flatten(val args: FlattenArgs) extends BDGSparkCommand[FlattenArgs] with Logging { val companion = Flatten - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { val job = HadoopUtil.newJob(sc) val records = sc.newAPIHadoopFile( @@ -76,4 +77,4 @@ class Flatten(val args: FlattenArgs) extends ADAMSparkCommand[FlattenArgs] with args.disableDictionaryEncoding, Some(flatSchema)) } -} \ No newline at end of file +} diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala index 3e2cc8e147..aaba5a5898 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala @@ -22,13 +22,14 @@ import org.apache.spark.SparkContext import org.bdgenomics.adam.models.SequenceRecord import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.Argument -object ListDict extends ADAMCommandCompanion { +object ListDict extends BDGCommandCompanion { val commandName: String = "listdict" val commandDescription: String = "Print the contents of an ADAM sequence dictionary" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new ListDict(Args4j[ListDictArgs](cmdLine)) } } @@ -38,10 +39,10 @@ class ListDictArgs extends Args4jBase with ParquetArgs { val inputPath: String = null } -class ListDict(protected val args: ListDictArgs) extends ADAMSparkCommand[ListDictArgs] { - val companion: ADAMCommandCompanion = ListDict +class ListDict(protected val args: ListDictArgs) extends BDGSparkCommand[ListDictArgs] { + val companion: BDGCommandCompanion = ListDict - def run(sc: SparkContext, job: Job): Unit = { + def run(sc: SparkContext): Unit = { val dict = sc.adamDictionaryLoad[AlignmentRecord](args.inputPath) dict.records.foreach { diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala deleted file mode 100644 index 25212fb1aa..0000000000 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.cli - -import org.bdgenomics.adam.rdd.{ ADAMSaveArgs, ADAMParquetArgs } -import org.kohsuke.args4j.{ Argument, Option } -import parquet.hadoop.metadata.CompressionCodecName - -trait ParquetArgs extends Args4jBase with ADAMParquetArgs { - @Option(required = false, name = "-parquet_block_size", usage = "Parquet block size (default = 128mb)") - var blockSize = 128 * 1024 * 1024 - @Option(required = false, name = "-parquet_page_size", usage = "Parquet page size (default = 1mb)") - var pageSize = 1 * 1024 * 1024 - @Option(required = false, name = "-parquet_compression_codec", usage = "Parquet compression codec") - var compressionCodec = CompressionCodecName.GZIP - @Option(name = "-parquet_disable_dictionary", usage = "Disable dictionary encoding") - override var disableDictionaryEncoding = false - @Option(required = false, name = "-parquet_logging_level", usage = "Parquet logging level (default = severe)") - var logLevel = "SEVERE" -} - -trait ParquetSaveArgs extends ParquetArgs with ADAMSaveArgs - -trait LoadFileArgs { - @Argument(required = true, metaVar = "INPUT", usage = "The ADAM, BAM or SAM file to load as input", index = 0) - var inputPath: String = null -} - -trait SaveFileArgs { - @Argument(required = true, metaVar = "OUTPUT", usage = "The ADAM, BAM or SAM file to save as output", index = 1) - var outputPath: String = null -} - -trait ParquetLoadSaveArgs extends ParquetSaveArgs with LoadFileArgs with SaveFileArgs diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala index 98b73efaa6..7938c78e84 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala @@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.plugins.{ AccessControl, ADAMPlugin } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } /** @@ -38,11 +39,11 @@ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } * plugin. The org.bdgenomics.adam.plugins.ADAMPlugin interface defines the * class that will run using this command. */ -object PluginExecutor extends ADAMCommandCompanion { +object PluginExecutor extends BDGCommandCompanion { val commandName: String = "plugin" val commandDescription: String = "Executes an ADAMPlugin" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new PluginExecutor(Args4j[PluginExecutorArgs](cmdLine)) } } @@ -62,8 +63,8 @@ class PluginExecutorArgs extends Args4jBase with ParquetArgs { var pluginArgs: String = "" } -class PluginExecutor(protected val args: PluginExecutorArgs) extends ADAMSparkCommand[PluginExecutorArgs] { - val companion: ADAMCommandCompanion = PluginExecutor +class PluginExecutor(protected val args: PluginExecutorArgs) extends BDGSparkCommand[PluginExecutorArgs] { + val companion: BDGCommandCompanion = PluginExecutor def loadPlugin[Input <% SpecificRecord: Manifest, Output](pluginName: String): ADAMPlugin[Input, Output] = { Thread.currentThread() @@ -85,7 +86,7 @@ class PluginExecutor(protected val args: PluginExecutorArgs) extends ADAMSparkCo output.map(_.toString).collect().foreach(println) } - def run(sc: SparkContext, job: Job): Unit = { + def run(sc: SparkContext): Unit = { val plugin = loadPlugin[AlignmentRecord, Any](args.plugin) val accessControl = loadAccessControl[AlignmentRecord](args.accessControl) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala index efc8b8cd35..8ba83c3d13 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala @@ -18,17 +18,17 @@ package org.bdgenomics.adam.cli import java.util - import org.apache.avro.generic.{ GenericDatumWriter, IndexedRecord } import org.apache.avro.io.EncoderFactory import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkContext import org.bdgenomics.adam.util.ParquetFileTraversable +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } import scala.collection.JavaConversions._ -object PrintADAM extends ADAMCommandCompanion { +object PrintADAM extends BDGCommandCompanion { val commandName: String = "print" val commandDescription: String = "Print an ADAM formatted file" @@ -48,7 +48,7 @@ class PrintADAMArgs extends Args4jBase { var prettyRaw: Boolean = false } -class PrintADAM(protected val args: PrintADAMArgs) extends ADAMSparkCommand[PrintADAMArgs] { +class PrintADAM(protected val args: PrintADAMArgs) extends BDGSparkCommand[PrintADAMArgs] { val companion = PrintADAM /** @@ -98,7 +98,7 @@ class PrintADAM(protected val args: PrintADAMArgs) extends ADAMSparkCommand[Prin }) } - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { val output = Option(args.outputFile) args.filesToPrint.foreach(file => { displayRaw(sc, file, pretty = args.prettyRaw, output = output) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala index df20805282..61c288ced9 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala @@ -23,9 +23,10 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.models._ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.Feature +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Option => option, Argument } -object PrintGenes extends ADAMCommandCompanion { +object PrintGenes extends BDGCommandCompanion { val commandName: String = "print_genes" val commandDescription: String = "Load a GTF file containing gene annotations and print the corresponding gene models" @@ -40,11 +41,11 @@ class PrintGenesArgs extends Args4jBase with ParquetArgs with Serializable { } class PrintGenes(protected val args: PrintGenesArgs) - extends ADAMSparkCommand[PrintGenesArgs] with Serializable { + extends BDGSparkCommand[PrintGenesArgs] with Serializable { val companion = PrintGenes - def run(sc: SparkContext, job: Job): Unit = { + def run(sc: SparkContext): Unit = { val genes: RDD[Gene] = sc.loadGenes(args.gtfInput) genes.map(printGene).collect().foreach(println) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala index 43e4068e62..f928e5c24b 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala @@ -17,25 +17,25 @@ */ package org.bdgenomics.adam.cli -import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.bdgenomics.adam.projections.AlignmentRecordField._ import org.bdgenomics.adam.projections.Projection import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord -import org.kohsuke.args4j.{ Argument, Option } +import org.bdgenomics.utils.cli._ +import org.kohsuke.args4j.{ Argument, Option => A4JOption } /** * Reads in the tagStrings field of every record, and prints out the set of unique * tags found in those fields along with the number of records that have each particular * tag. */ -object PrintTags extends ADAMCommandCompanion { +object PrintTags extends BDGCommandCompanion { val commandName: String = "print_tags" val commandDescription: String = "Prints the values and counts of all tags in a set of records" - def apply(cmdLine: Array[String]): ADAMCommand = { + def apply(cmdLine: Array[String]): BDGCommand = { new PrintTags(Args4j[PrintTagsArgs](cmdLine)) } } @@ -44,20 +44,20 @@ class PrintTagsArgs extends Args4jBase with ParquetArgs { @Argument(required = true, metaVar = "INPUT", usage = "The ADAM file to scan for tags", index = 0) val inputPath: String = null - @Option(required = false, name = "-list", + @A4JOption(required = false, name = "-list", usage = "When value is set to , also lists the first N attribute fields for ADAMRecords in the input") var list: String = null - @Option(required = false, name = "-count", + @A4JOption(required = false, name = "-count", usage = "comma-separated list of tag names; for each tag listed, we print the distinct values and their counts") var count: String = null } -class PrintTags(protected val args: PrintTagsArgs) extends ADAMSparkCommand[PrintTagsArgs] { - val companion: ADAMCommandCompanion = PrintTags +class PrintTags(protected val args: PrintTagsArgs) extends BDGSparkCommand[PrintTagsArgs] { + val companion: BDGCommandCompanion = PrintTags - def run(sc: SparkContext, job: Job): Unit = { + override def run(sc: SparkContext): Unit = { val toCount = if (args.count != null) args.count.split(",").toSet else Set() val proj = Projection(attributes, primaryAlignment, readMapped, readPaired, failedVendorQualityChecks) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala index 82edd945db..5f23de0b8c 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala @@ -27,9 +27,10 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rich.RichVariant import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -object Transform extends ADAMCommandCompanion { +object Transform extends BDGCommandCompanion { val commandName = "transform" val commandDescription = "Convert SAM/BAM to ADAM format and optionally perform read pre-processing transformations" @@ -95,7 +96,7 @@ class TransformArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs { var forceLoadParquet: Boolean = false } -class Transform(protected val args: TransformArgs) extends ADAMSparkCommand[TransformArgs] with Logging { +class Transform(protected val args: TransformArgs) extends BDGSparkCommand[TransformArgs] with Logging { val companion = Transform def apply(rdd: RDD[AlignmentRecord]): RDD[AlignmentRecord] = { @@ -162,7 +163,7 @@ class Transform(protected val args: TransformArgs) extends ADAMSparkCommand[Tran adamRecords } - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { this.apply({ if (args.forceLoadBam) { sc.loadBam(args.inputPath) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala index 52cba7a9af..aca26ec3d4 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala @@ -17,15 +17,16 @@ */ package org.bdgenomics.adam.cli -import org.bdgenomics.adam.models.{ SequenceDictionary, VariantContext } -import org.bdgenomics.adam.rdd.ADAMContext._ import org.apache.hadoop.mapreduce.Job import org.apache.spark.{ Logging, SparkContext } import org.apache.spark.rdd.RDD +import org.bdgenomics.adam.models.{ SequenceDictionary, VariantContext } +import org.bdgenomics.adam.rdd.ADAMContext._ +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Option => Args4jOption, Argument } import java.io.File -object Vcf2ADAM extends ADAMCommandCompanion { +object Vcf2ADAM extends BDGCommandCompanion { val commandName = "vcf2adam" val commandDescription = "Convert a VCF file to the corresponding ADAM format" @@ -51,10 +52,10 @@ class Vcf2ADAMArgs extends Args4jBase with ParquetSaveArgs { var onlyvariants: Boolean = false } -class Vcf2ADAM(val args: Vcf2ADAMArgs) extends ADAMSparkCommand[Vcf2ADAMArgs] with DictionaryCommand with Logging { +class Vcf2ADAM(val args: Vcf2ADAMArgs) extends BDGSparkCommand[Vcf2ADAMArgs] with DictionaryCommand with Logging { val companion = Vcf2ADAM - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { var dictionary: Option[SequenceDictionary] = loadSequenceDictionary(args.dictionaryFile) if (dictionary.isDefined) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala index a526ded134..072430aceb 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala @@ -15,22 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* -* Copyright (c) 2014. Mount Sinai School of Medicine -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - package org.bdgenomics.adam.cli import org.apache.hadoop.mapreduce.Job @@ -41,9 +25,10 @@ import org.bdgenomics.adam.converters.VariantAnnotationConverter import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rich.RichVariant import org.bdgenomics.formats.avro._ +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -object VcfAnnotation2ADAM extends ADAMCommandCompanion { +object VcfAnnotation2ADAM extends BDGCommandCompanion { val commandName = "anno2adam" val commandDescription = "Convert a annotation file (in VCF format) to the corresponding ADAM format" @@ -62,10 +47,10 @@ class VcfAnnotation2ADAMArgs extends Args4jBase with ParquetSaveArgs { var currentAnnotations: String = null } -class VcfAnnotation2ADAM(val args: VcfAnnotation2ADAMArgs) extends ADAMSparkCommand[VcfAnnotation2ADAMArgs] with Logging { +class VcfAnnotation2ADAM(val args: VcfAnnotation2ADAMArgs) extends BDGSparkCommand[VcfAnnotation2ADAMArgs] with Logging { val companion = VcfAnnotation2ADAM - def run(sc: SparkContext, job: Job) { + def run(sc: SparkContext) { log.info("Reading VCF file from %s".format(args.vcfFile)) val annotations: RDD[DatabaseVariantAnnotation] = sc.loadVcfAnnotations(args.vcfFile) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala index 65d5253cd0..c8d7b16930 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala @@ -20,12 +20,12 @@ package org.bdgenomics.adam.cli import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.bdgenomics.adam.rdd.ADAMSaveArgs +import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -import org.bdgenomics.adam.rdd.ADAMContext._ -class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveArgs { +class ViewArgs extends Args4jBase with ParquetArgs with SaveArgs { @Argument(required = true, metaVar = "INPUT", usage = "The ADAM, BAM or SAM file to view", index = 0) var inputPath: String = null @@ -74,7 +74,7 @@ class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveArgs { var outputPathArg: String = null } -object View extends ADAMCommandCompanion { +object View extends BDGCommandCompanion { val commandName = "view" val commandDescription = "View certain reads from an alignment-record file." @@ -93,7 +93,7 @@ object View extends ADAMCommandCompanion { * * It is agnostic to its input and output being SAM, BAM, or ADAM files; when printing to stdout it prints SAM. */ -class View(val args: ViewArgs) extends ADAMSparkCommand[ViewArgs] { +class View(val args: ViewArgs) extends BDGSparkCommand[ViewArgs] { val companion = View type ReadFilter = (AlignmentRecord => Boolean) @@ -148,7 +148,7 @@ class View(val args: ViewArgs) extends ADAMSparkCommand[ViewArgs] { reads } - def run(sc: SparkContext, job: Job) = { + def run(sc: SparkContext) = { val reads: RDD[AlignmentRecord] = applyFilters(sc.loadAlignments(args.inputPath)) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala index 1a2788a1e0..3c5255fde8 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala @@ -19,6 +19,7 @@ package org.bdgenomics.adam.cli import java.io.PrintWriter +import org.bdgenomics.utils.cli._ import org.kohsuke.args4j.Option import scala.io.Source @@ -37,7 +38,7 @@ class Wig2BedArgs extends Args4jBase { * guarantees where the sync markers are. This makes it difficult to use as a * "splittable" format, and necessitates processing the file locally. */ -object WigFix2Bed extends ADAMCommandCompanion { +object WigFix2Bed extends BDGCommandCompanion { val commandName = "wigfix2bed" val commandDescription = "Locally convert a wigFix file to BED format" @@ -51,7 +52,7 @@ object WigFix2Bed extends ADAMCommandCompanion { } } -class WigFix2Bed(val args: Wig2BedArgs) extends ADAMCommand { +class WigFix2Bed(val args: Wig2BedArgs) extends BDGCommand { val companion = WigFix2Bed def run() { diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala index bbd1a19df5..78a5cce39c 100644 --- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala +++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala @@ -18,10 +18,10 @@ package org.bdgenomics.adam.cli import java.io._ - import org.bdgenomics.adam.projections.Projection import org.bdgenomics.adam.projections.FeatureField._ -import org.bdgenomics.adam.util.{ HadoopUtil, ADAMFunSuite } +import org.bdgenomics.adam.util.ADAMFunSuite +import org.bdgenomics.utils.cli.Args4j import org.bdgenomics.formats.avro.Feature class Features2ADAMSuite extends ADAMFunSuite { @@ -42,8 +42,7 @@ class Features2ADAMSuite extends ADAMFunSuite { val args: Features2ADAMArgs = Args4j.apply[Features2ADAMArgs](argLine) val features2Adam = new Features2ADAM(args) - val job = HadoopUtil.newJob() - features2Adam.run(sc, job) + features2Adam.run(sc) val schema = Projection(featureId, contig, start, strand) val lister = new ParquetLister[Feature](Some(schema)) @@ -88,8 +87,7 @@ class Features2ADAMSuite extends ADAMFunSuite { val adamArgLine = "%s %s".format(bedPath, outputPath).split("\\s+") val adamArgs: Features2ADAMArgs = Args4j.apply[Features2ADAMArgs](adamArgLine) val features2Adam = new Features2ADAM(adamArgs) - val job = HadoopUtil.newJob() - features2Adam.run(sc, job) + features2Adam.run(sc) val schema = Projection(featureId, contig, start, end, value) val lister = new ParquetLister[Feature](Some(schema)) diff --git a/adam-cli/src/test/scala/FlagStatTest.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala similarity index 98% rename from adam-cli/src/test/scala/FlagStatTest.scala rename to adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala index eabcdb056a..86d6d1b644 100644 --- a/adam-cli/src/test/scala/FlagStatTest.scala +++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala @@ -16,16 +16,17 @@ * limitations under the License. */ package org.bdgenomics.adam.cli -import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.RDD import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ +import org.bdgenomics.adam.rdd.read.FlagStat._ import org.bdgenomics.adam.rdd.read.{ DuplicateMetrics, FlagStatMetrics } import org.bdgenomics.adam.util.ADAMFunSuite import org.bdgenomics.formats.avro.AlignmentRecord -import org.bdgenomics.adam.rdd.read.FlagStat._ +import org.bdgenomics.utils.cli.Args4j -class FlagStatTest extends ADAMFunSuite { +class FlagStatSuite extends ADAMFunSuite { sparkTest("Standard FlagStat test") { diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala index d50540bf67..de4217d1e9 100644 --- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala +++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala @@ -18,10 +18,11 @@ package org.bdgenomics.adam.cli import java.io._ - import org.apache.avro.generic.GenericRecord -import org.bdgenomics.adam.util.{ ADAMFunSuite, HadoopUtil } +import org.bdgenomics.adam.util.ADAMFunSuite import org.bdgenomics.formats.avro.Genotype +import org.bdgenomics.utils.cli.Args4j +import org.bdgenomics.utils.misc.HadoopUtil class FlattenSuite extends ADAMFunSuite { @@ -40,8 +41,7 @@ class FlattenSuite extends ADAMFunSuite { val argLine = "%s %s".format(inputPath, outputPath).split("\\s+") val args: Vcf2ADAMArgs = Args4j.apply[Vcf2ADAMArgs](argLine) val vcf2Adam = new Vcf2ADAM(args) - val job = HadoopUtil.newJob() - vcf2Adam.run(sc, job) + vcf2Adam.run(sc) val lister = new ParquetLister[Genotype]() val records = lister.materialize(outputPath).toSeq @@ -54,8 +54,7 @@ class FlattenSuite extends ADAMFunSuite { val flattenArgLine = "%s %s".format(outputPath, flatPath).split("\\s+") val flattenArgs: FlattenArgs = Args4j.apply[FlattenArgs](flattenArgLine) val flatten = new Flatten(flattenArgs) - val flattenJob = HadoopUtil.newJob() - flatten.run(sc, flattenJob) + flatten.run(sc) val flatLister = new ParquetLister[GenericRecord]() val flatRecords = flatLister.materialize(flatPath).toSeq diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala index 492a499863..0e2fb77a86 100644 --- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala +++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala @@ -18,7 +18,7 @@ package org.bdgenomics.adam.cli import java.io._ -import org.bdgenomics.adam.util.{ HadoopUtil, ADAMFunSuite } +import org.bdgenomics.adam.util.ADAMFunSuite class PluginExecutorSuite extends ADAMFunSuite { @@ -37,7 +37,7 @@ class PluginExecutorSuite extends ADAMFunSuite { val pluginExecutor = new PluginExecutor(args) val bytesWritten = new ByteArrayOutputStream() - scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob())) + scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc)) val outputString = bytesWritten.toString @@ -60,7 +60,7 @@ class PluginExecutorSuite extends ADAMFunSuite { val pluginExecutor = new PluginExecutor(args) val bytesWritten = new ByteArrayOutputStream() - scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob())) + scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc)) val outputString = bytesWritten.toString @@ -85,7 +85,7 @@ class PluginExecutorSuite extends ADAMFunSuite { val pluginExecutor = new PluginExecutor(args) val bytesWritten = new ByteArrayOutputStream() - scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob())) + scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc)) val outputString = bytesWritten.toString diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala index f4138ca4fc..f4b44d064a 100644 --- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala +++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.util.ADAMFunSuite import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.formats.avro.AlignmentRecord +import org.bdgenomics.utils.cli.Args4j class ViewSuite extends ADAMFunSuite { diff --git a/adam-core/pom.xml b/adam-core/pom.xml index 4e1967888d..415c7ccc15 100644 --- a/adam-core/pom.xml +++ b/adam-core/pom.xml @@ -3,13 +3,14 @@ 4.0.0 org.bdgenomics.adam - adam-parent + adam-parent_2.10 0.16.1-SNAPSHOT ../pom.xml - adam-core + + adam-core_2.10 jar - ADAM: Core + ADAM_2.10: Core @@ -112,18 +113,22 @@ - org.bdgenomics.bdg-utils - bdg-utils-misc + org.bdgenomics.utils + utils-misc_2.10 test-jar test - org.bdgenomics.bdg-utils - bdg-utils-metrics + org.bdgenomics.utils + utils-metrics_2.10 + + + org.bdgenomics.utils + utils-io_2.10 - org.bdgenomics.bdg-utils - bdg-utils-parquet + org.bdgenomics.utils + utils-cli_2.10 com.esotericsoftware.kryo @@ -131,7 +136,7 @@ org.scoverage - scalac-scoverage-plugin_${scala.artifact.suffix} + scalac-scoverage-plugin_2.10 org.bdgenomics.bdg-formats @@ -151,7 +156,7 @@ org.apache.spark - spark-core_${scala.artifact.suffix} + spark-core_2.10 it.unimi.dsi @@ -183,7 +188,7 @@ org.scalatest - scalatest_${scala.artifact.suffix} + scalatest_2.10 test diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala index e6fdf56890..ffb4c7192e 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala @@ -86,8 +86,5 @@ class VariantContext( val variant: RichVariant, val genotypes: Iterable[Genotype], val databases: Option[DatabaseVariantAnnotation] = None) { - def this(variant: RichVariant, genotypes: Iterable[Genotype], database: Option[DatabaseVariantAnnotation] = None) = { - this(ReferencePosition(variant), variant, genotypes, database) - } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala index d4b98a1337..707448d3b2 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala @@ -38,9 +38,9 @@ import org.bdgenomics.adam.rdd.features._ import org.bdgenomics.adam.rdd.read.AlignmentRecordRDDFunctions import org.bdgenomics.adam.rdd.variation._ import org.bdgenomics.adam.rich.RichAlignmentRecord -import org.bdgenomics.adam.util.HadoopUtil import org.bdgenomics.formats.avro._ import org.bdgenomics.utils.instrumentation.Metrics +import org.bdgenomics.utils.misc.HadoopUtil import org.seqdoop.hadoop_bam.util.SAMHeaderReader import org.seqdoop.hadoop_bam._ import parquet.avro.{ AvroParquetInputFormat, AvroReadSupport } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala index 2a9c55818f..1f8ccf861c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala @@ -20,40 +20,27 @@ package org.bdgenomics.adam.rdd import java.util.logging.Level import org.apache.avro.Schema import org.apache.avro.generic.IndexedRecord +import org.apache.hadoop.mapreduce.{ OutputFormat => NewOutputFormat, _ } import org.apache.spark.Logging import org.apache.spark.rdd.{ InstrumentedOutputFormat, RDD } import org.apache.spark.rdd.MetricsContext._ import org.bdgenomics.adam.instrumentation.Timers._ import org.bdgenomics.adam.models._ -import org.bdgenomics.adam.util.{ - HadoopUtil, - ParquetLogger -} +import org.bdgenomics.adam.util.ParquetLogger +import org.bdgenomics.utils.cli.SaveArgs +import org.bdgenomics.utils.misc.HadoopUtil import parquet.avro.AvroParquetOutputFormat import parquet.hadoop.ParquetOutputFormat import parquet.hadoop.metadata.CompressionCodecName import parquet.hadoop.util.ContextUtil -import org.apache.avro.generic.IndexedRecord -import org.apache.hadoop.mapreduce.{ OutputFormat => NewOutputFormat, _ } - -trait ADAMParquetArgs { - var blockSize: Int - var pageSize: Int - var compressionCodec: CompressionCodecName - var disableDictionaryEncoding: Boolean -} - -trait ADAMSaveArgs extends ADAMParquetArgs { - var outputPath: String -} -trait ADAMSaveAnyArgs extends ADAMSaveArgs { +trait ADAMSaveAnyArgs extends SaveArgs { var sortFastqOutput: Boolean } class ADAMRDDFunctions[T <% IndexedRecord: Manifest](rdd: RDD[T]) extends Serializable with Logging { - def adamParquetSave(args: ADAMSaveArgs): Unit = { + def adamParquetSave(args: SaveArgs): Unit = { adamParquetSave( args.outputPath, args.blockSize, diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala index 198c129196..79b13f83fa 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala @@ -26,11 +26,9 @@ import org.bdgenomics.adam.converters.FragmentConverter import org.bdgenomics.adam.models._ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSequenceDictionaryRDDAggregator -import org.bdgenomics.adam.util.{ - HadoopUtil, - ParquetLogger -} +import org.bdgenomics.adam.util.ParquetLogger import org.bdgenomics.formats.avro._ +import org.bdgenomics.utils.misc.HadoopUtil import parquet.avro.AvroParquetOutputFormat import parquet.hadoop.ParquetOutputFormat import parquet.hadoop.metadata.CompressionCodecName diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala index 8b50f7fb9f..7bf5b3e882 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala @@ -34,13 +34,14 @@ import org.bdgenomics.adam.instrumentation.Timers._ import org.bdgenomics.adam.models._ import org.bdgenomics.adam.models.ReferenceRegion._ import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.rdd.{ ADAMSaveArgs, ADAMSaveAnyArgs, ADAMSequenceDictionaryRDDAggregator } +import org.bdgenomics.adam.rdd.{ ADAMSaveAnyArgs, ADAMSequenceDictionaryRDDAggregator } import org.bdgenomics.adam.rdd.read.correction.{ ErrorCorrection, TrimReads } import org.bdgenomics.adam.rdd.read.realignment.RealignIndels import org.bdgenomics.adam.rdd.read.recalibration.BaseQualityRecalibration import org.bdgenomics.adam.rich.RichAlignmentRecord import org.bdgenomics.adam.util.MapTools import org.bdgenomics.formats.avro._ +import org.bdgenomics.utils.cli.SaveArgs class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) extends ADAMSequenceDictionaryRDDAggregator[AlignmentRecord](rdd) { @@ -67,7 +68,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) rdd.filter(overlapsQuery) } - def maybeSaveBam(args: ADAMSaveArgs): Boolean = { + def maybeSaveBam(args: SaveArgs): Boolean = { if (args.outputPath.endsWith(".sam")) { log.info("Saving data in SAM format") rdd.adamSAMSave(args.outputPath) @@ -89,7 +90,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) false } - def adamAlignedRecordSave(args: ADAMSaveArgs) = { + def adamAlignedRecordSave(args: SaveArgs) = { maybeSaveBam(args) || { rdd.adamParquetSave(args); true } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala index 0499015f28..9a75831e85 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala @@ -32,8 +32,8 @@ import org.bdgenomics.adam.models.{ import org.bdgenomics.adam.rdd.ADAMSequenceDictionaryRDDAggregator import org.bdgenomics.adam.rich.RichVariant import org.bdgenomics.adam.rich.RichGenotype._ -import org.bdgenomics.adam.util.HadoopUtil import org.bdgenomics.formats.avro.{ Genotype, GenotypeType, DatabaseVariantAnnotation } +import org.bdgenomics.utils.misc.HadoopUtil import org.seqdoop.hadoop_bam._ class VariantContextRDDFunctions(rdd: RDD[VariantContext]) extends ADAMSequenceDictionaryRDDAggregator[VariantContext](rdd) with Logging { @@ -144,7 +144,7 @@ class GenotypeRDDFunctions(rdd: RDD[Genotype]) extends Serializable with Logging def toVariantContext(): RDD[VariantContext] = { rdd.keyBy({ g => RichVariant.variantToRichVariant(g.getVariant) }) .groupByKey - .map { case (v: RichVariant, g) => new VariantContext(v, g, None) } + .map { case (v: RichVariant, g) => new VariantContext(ReferencePosition(v), v, g, None) } } def filterByOverlappingRegion(query: ReferenceRegion): RDD[Genotype] = { diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala deleted file mode 100644 index 77d0967839..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.util - -import org.apache.hadoop.mapreduce.Job -import org.apache.spark.SparkContext -import org.apache.hadoop.fs.FileStatus -import org.apache.hadoop.conf.Configuration - -object HadoopUtil { - - def newJob(): Job = { - newJob(new Configuration()) - } - - def newJob(config: Configuration): Job = { - val jobClass: Class[_] = Class.forName("org.apache.hadoop.mapreduce.Job") - try { - // Use the getInstance method in Hadoop 2 - jobClass.getMethod("getInstance", classOf[Configuration]).invoke(null, config).asInstanceOf[Job] - } catch { - case ex: NoSuchMethodException => - // Drop back to Hadoop 1 constructor - jobClass.getConstructor(classOf[Configuration]).newInstance(config).asInstanceOf[Job] - } - } - - /** - * Create a job using either the Hadoop 1 or 2 API - * @param sc A Spark context - */ - def newJob(sc: SparkContext): Job = { - newJob(sc.hadoopConfiguration) - } - - /** - * In Hadoop 2.x, isDir is deprecated in favor of isDirectory - * @param fs - * @return - */ - def isDirectory(fs: FileStatus): Boolean = { - val fsClass: Class[_] = fs.getClass - try { - // Use the isDirectory method in Hadoop 2 - fsClass.getMethod("isDirectory").invoke(fs).asInstanceOf[Boolean] - } catch { - case ex: NoSuchMethodException => - // Drop back to Hadoop 1 isDir method - fsClass.getMethod("isDir").invoke(fs).asInstanceOf[Boolean] - } - } - -} diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala index 259f56ea00..76e259449c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.{ FileSystem, Path } import parquet.avro.AvroParquetReader import org.apache.avro.generic.IndexedRecord import org.apache.spark.SparkContext +import org.bdgenomics.utils.misc.HadoopUtil class ParquetFileTraversable[T <: IndexedRecord](sc: SparkContext, file: Path) extends Traversable[T] { def this(sc: SparkContext, file: String) = this(sc, new Path(file)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala index e94df85e94..a83a0162fb 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala @@ -19,7 +19,7 @@ package org.bdgenomics.adam.util import java.nio.{ ByteOrder, ByteBuffer } -import org.bdgenomics.utils.parquet.io.ByteAccess +import org.bdgenomics.utils.io.ByteAccess import org.bdgenomics.adam.models.ReferenceRegion object TwoBitFile { diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala index 3673e6946f..02952f1d49 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala @@ -19,7 +19,7 @@ package org.bdgenomics.adam.util import java.io.File -import org.bdgenomics.utils.parquet.io.LocalFileByteAccess +import org.bdgenomics.utils.io.LocalFileByteAccess import org.bdgenomics.adam.models.ReferenceRegion import org.scalatest.FunSuite diff --git a/bin/adam-submit b/bin/adam-submit index b7bd138dbb..b583bcdfcf 100755 --- a/bin/adam-submit +++ b/bin/adam-submit @@ -38,21 +38,23 @@ REPO_DIR="$SCRIPT_DIR/adam-cli/target/appassembler/repo/" fi # Find the ADAM CLI jar -CLI_DIR="$REPO_DIR/org/bdgenomics/adam/adam-cli" -num_versions=$(ls "$CLI_DIR" | wc -l) +ADAM_DIR="$REPO_DIR/org/bdgenomics/adam/" +num_versions=$(ls ${ADAM_DIR} | grep cli | wc -l) if [ "$num_versions" -eq "0" ]; then - echo "Failed to find adam-cli jar in $CLI_DIR" + echo "Failed to find adam-cli jar in $ADAM_DIR" echo "You need to build ADAM before running this program." exit 1 fi if [ "$num_versions" -gt "1" ]; then - versions_list=$(ls "$CLI_DIR") - echo "Found multiple ADAM CLI versions in $CLI_DIR:" + versions_list=$(ls "$ADAM_DIR" | grep cli) + echo "Found multiple ADAM CLI versions in $ADAM_DIR:" echo "$versions_list" echo "Please remove all but one." exit 1 fi -ADAM_CLI_JAR=$(ls $CLI_DIR/*/adam-cli-*.jar) +CLI=$(ls "$ADAM_DIR" | grep cli) +CLI_DIR="${ADAM_DIR}/${CLI}" +ADAM_CLI_JAR=$(ls $CLI_DIR/*/adam-cli_2.1[01]-*.jar) # Find spark-submit script if [ -z "$SPARK_HOME" ]; then diff --git a/distribution/pom.xml b/distribution/pom.xml index e4a09eb76e..6d90529352 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -8,9 +8,9 @@ ../pom.xml - adam-distribution + adam-distribution_2.10 pom - ADAM: Distribution + ADAM_2.10: Distribution 2.2.0 0.99.2 - 0.1.1 + 0.2.1 1.129 @@ -65,8 +63,8 @@ - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-2.10.4/classes + target/scala-2.10.4/test-classes @@ -249,8 +247,7 @@ - ${scala.version} - incremental + 2.10.4 true -unchecked @@ -301,7 +298,7 @@ org.scoverage - scalac-scoverage-plugin_${scala.artifact.suffix} + scalac-scoverage-plugin_2.10 ${scoverage.version} @@ -322,47 +319,52 @@ org.scala-lang scala-library - ${scala.version} + 2.10.4 org.bdgenomics.adam - adam-core + adam-core_2.10 ${project.version} org.bdgenomics.adam - adam-core + adam-core_2.10 ${project.version} test-jar test org.bdgenomics.adam - adam-apis + adam-apis_2.10 ${project.version} org.bdgenomics.adam - adam-apis + adam-apis_2.10 ${project.version} test-jar test - org.bdgenomics.bdg-utils - bdg-utils-misc + org.bdgenomics.utils + utils-misc_2.10 ${utils.version} test-jar test - org.bdgenomics.bdg-utils - bdg-utils-parquet + org.bdgenomics.utils + utils-cli_2.10 ${utils.version} - org.bdgenomics.bdg-utils - bdg-utils-metrics + org.bdgenomics.utils + utils-io_2.10 + ${utils.version} + + + org.bdgenomics.utils + utils-metrics_2.10 ${utils.version} @@ -399,7 +401,7 @@ org.apache.spark - spark-core_${scala.artifact.suffix} + spark-core_2.10 ${spark.version} provided @@ -427,6 +429,12 @@ com.twitter parquet-scala_2.10 ${parquet.version} + + + org.scala-lang + scala-library + + org.seqdoop @@ -451,7 +459,7 @@ org.scalatest - scalatest_${scala.artifact.suffix} + scalatest_2.10 2.2.2 test @@ -482,21 +490,6 @@ - - org.fusesource.scalate - scalate-core_2.10 - 1.6.1 - - - org.scalatra - scalatra-json_2.10 - 2.3.0 - - - org.scalatra - scalatra_2.10 - 2.3.0 - com.google.guava guava @@ -626,8 +619,7 @@ - ${scala.version} - incremental + 2.10.4 true -unchecked @@ -649,7 +641,7 @@ org.scoverage - scalac-scoverage-plugin_${scala.artifact.suffix} + scalac-scoverage-plugin_2.10 ${scoverage.version} diff --git a/scripts/changelog.sh b/scripts/changelog.sh index efab710609..016e05d896 100755 --- a/scripts/changelog.sh +++ b/scripts/changelog.sh @@ -19,7 +19,7 @@ echo "# ADAM #" git log | grep -E "Merge pull request|prepare release" | grep -vi "Revert" | uniq | while read l do - release=`echo $l | grep "\[maven-release-plugin\] prepare release" | cut -d "-" -f 5` + release=`echo $l | grep "prepare release" | grep -v 2.11 | awk -F'-' '{print $NF}' | awk -F'_' '{ print $1 }'` PR=`echo $l| grep -E -o "Merge pull request #[^ ]*" | cut -d "#" -f 2` # echo $l if [ -n "$release" ] @@ -30,7 +30,7 @@ do if [ -n "$PR" ] then JSON=`curl -u $username:$password -s https://api.github.com/repos/bigdatagenomics/adam/pulls/$PR | tr "\n" " "` - DESC_RAW=$(echo $JSON | grep -Po '"title":.*?[^\\]",' | cut -d "\"" -f 4- | head -n 1 | sed -e "s/\\\\//g") + DESC_RAW=$(echo $JSON | egrep -o '"title":.*?[^\\]",' | cut -d "\"" -f 4- | head -n 1 | sed -e "s/\\\\//g") DESC=$(echo ${DESC_RAW%\",}) echo "* ISSUE [$PR](https://github.com/bigdatagenomics/adam/pull/$PR): ${DESC}" fi diff --git a/scripts/move_to_scala_2.10.sh b/scripts/move_to_scala_2.10.sh new file mode 100755 index 0000000000..2ecb6824d4 --- /dev/null +++ b/scripts/move_to_scala_2.10.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set +x + +find . -name "pom.xml" -exec sed -e "s/2.11.4/2.10.4/g" -e "s/2.11/2.10/g" -i .2.10.bak '{}' \; +find . -name "*.2.10.bak" -exec rm {} \; diff --git a/scripts/move_to_scala_2.11.sh b/scripts/move_to_scala_2.11.sh new file mode 100755 index 0000000000..86f7b8793a --- /dev/null +++ b/scripts/move_to_scala_2.11.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set +x + +find . -name "pom.xml" -exec sed -e "s/2.10.4/2.11.4/g" -e "s/2.10/2.11/g" -i .2.11.bak '{}' \; +find . -name "pom.xml" -exec sed -e "s/parquet-scala_2.11/parquet-scala_2.10/g" -i .2.11.2.bak '{}' \; +find . -name "*.2.11.*bak" -exec rm {} \; diff --git a/scripts/release/release.sh b/scripts/release/release.sh index dcb9664ec6..7522397e38 100755 --- a/scripts/release/release.sh +++ b/scripts/release/release.sh @@ -1,3 +1,14 @@ #!/bin/sh +# do scala 2.10 release mvn -P distribution -Dresume=false release:clean release:prepare release:perform + +# do scala 2.11 release +./scripts/move_to_scala_2.11.sh +git commit -a -m "Modifying pom.xml files for 2.11 release." +mvn -P distribution -Dresume=false release:clean release:prepare release:perform + +# move back to 2.10 for development +./scripts/move_to_scala_2.10.sh +./scripts/changelog.sh | tee CHANGES.md +git commit -a -m "Modifying pom.xml files to move back to Scala 2.10 for development."