diff --git a/adam-apis/pom.xml b/adam-apis/pom.xml
index 9645279672..76a256d9a8 100644
--- a/adam-apis/pom.xml
+++ b/adam-apis/pom.xml
@@ -3,14 +3,14 @@
4.0.0
org.bdgenomics.adam
- adam-parent
+ adam-parent_2.10
0.16.1-SNAPSHOT
../pom.xml
- adam-apis
+ adam-apis_2.10
jar
- ADAM: APIs for Java
+ ADAM_2.10: APIs for Java
@@ -95,11 +95,11 @@
org.apache.spark
- spark-core_${scala.artifact.suffix}
+ spark-core_2.10
- org.bdgenomics.bdg-utils
- bdg-utils-misc
+ org.bdgenomics.utils
+ utils-misc_2.10
test-jar
test
@@ -109,11 +109,11 @@
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
test-jar
test
@@ -131,7 +131,7 @@
org.scalatest
- scalatest_${scala.artifact.suffix}
+ scalatest_2.10
test
diff --git a/adam-cli/pom.xml b/adam-cli/pom.xml
index c6bed3d7a3..71a794d226 100644
--- a/adam-cli/pom.xml
+++ b/adam-cli/pom.xml
@@ -3,14 +3,14 @@
4.0.0
org.bdgenomics.adam
- adam-parent
+ adam-parent_2.10
0.16.1-SNAPSHOT
../pom.xml
- adam-cli
+ adam-cli_2.10
jar
- ADAM: CLI
+ ADAM_2.10: CLI
@@ -104,25 +104,29 @@
org.apache.spark
- spark-core_${scala.artifact.suffix}
+ spark-core_2.10
- org.bdgenomics.bdg-utils
- bdg-utils-misc
+ org.bdgenomics.utils
+ utils-misc_2.10
test-jar
test
- org.bdgenomics.bdg-utils
- bdg-utils-parquet
+ org.bdgenomics.utils
+ utils-io_2.10
- org.bdgenomics.bdg-utils
- bdg-utils-metrics
+ org.bdgenomics.utils
+ utils-cli_2.10
+
+
+ org.bdgenomics.utils
+ utils-metrics_2.10
org.scoverage
- scalac-scoverage-plugin_${scala.artifact.suffix}
+ scalac-scoverage-plugin_2.10
org.bdgenomics.bdg-formats
@@ -130,21 +134,21 @@
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
test-jar
test
org.bdgenomics.adam
- adam-apis
+ adam-apis_2.10
org.bdgenomics.adam
- adam-apis
+ adam-apis_2.10
test-jar
test
@@ -160,21 +164,9 @@
args4j
args4j
-
- org.fusesource.scalate
- scalate-core_2.10
-
-
- org.scalatra
- scalatra-json_2.10
-
-
- org.scalatra
- scalatra_2.10
-
org.scalatest
- scalatest_${scala.artifact.suffix}
+ scalatest_2.10
test
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala
index 0c69918a7b..1ed6b28e1e 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Vcf.scala
@@ -17,17 +17,18 @@
*/
package org.bdgenomics.adam.cli
-import org.bdgenomics.formats.avro.Genotype
-import org.bdgenomics.adam.rdd.ADAMContext._
-import org.kohsuke.args4j.{ Option => Args4jOption, Argument }
+import java.io.File
import org.apache.spark.rdd.RDD
import org.apache.spark.{ Logging, SparkContext }
import org.apache.hadoop.mapreduce.Job
-import java.io.File
import org.bdgenomics.adam.models.SequenceDictionary
+import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.formats.avro.Genotype
+import org.bdgenomics.utils.cli._
+import org.kohsuke.args4j.{ Option => Args4jOption, Argument }
import scala.Option
-object ADAM2Vcf extends ADAMCommandCompanion {
+object ADAM2Vcf extends BDGCommandCompanion {
val commandName = "adam2vcf"
val commandDescription = "Convert an ADAM variant to the VCF ADAM format"
@@ -54,10 +55,10 @@ class ADAM2VcfArgs extends Args4jBase with ParquetArgs {
var sort: Boolean = false
}
-class ADAM2Vcf(val args: ADAM2VcfArgs) extends ADAMSparkCommand[ADAM2VcfArgs] with DictionaryCommand with Logging {
+class ADAM2Vcf(val args: ADAM2VcfArgs) extends BDGSparkCommand[ADAM2VcfArgs] with DictionaryCommand with Logging {
val companion = ADAM2Vcf
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
var dictionary: Option[SequenceDictionary] = loadSequenceDictionary(args.dictionaryFile)
if (dictionary.isDefined)
log.info("Using contig translation")
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala
deleted file mode 100644
index 93ebec5f26..0000000000
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMCommand.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.bdgenomics.adam.cli
-
-import java.io.{ StringWriter, PrintWriter }
-
-import org.apache.hadoop.mapreduce.Job
-import org.apache.spark.{ SparkConf, Logging, SparkContext }
-import org.bdgenomics.utils.instrumentation._
-import org.bdgenomics.adam.util.HadoopUtil
-
-trait ADAMCommandCompanion {
- val commandName: String
- val commandDescription: String
-
- def apply(cmdLine: Array[String]): ADAMCommand
-
- // Make running an ADAM command easier from an IDE
- def main(cmdLine: Array[String]) {
- apply(cmdLine).run()
- }
-}
-
-trait ADAMCommand extends Runnable {
- val companion: ADAMCommandCompanion
-}
-
-trait ADAMSparkCommand[A <: Args4jBase] extends ADAMCommand with Logging {
- protected val args: A
-
- def run(sc: SparkContext, job: Job)
-
- def run() {
- val start = System.nanoTime()
- val conf = new SparkConf().setAppName("adam: " + companion.commandName)
- if (conf.getOption("spark.master").isEmpty) {
- conf.setMaster("local[%d]".format(Runtime.getRuntime.availableProcessors()))
- }
- val sc = new SparkContext(conf)
- val job = HadoopUtil.newJob()
- val metricsListener = initializeMetrics(sc)
- run(sc, job)
- val totalTime = System.nanoTime() - start
- printMetrics(totalTime, metricsListener)
- }
-
- def initializeMetrics(sc: SparkContext): Option[MetricsListener] = {
- if (args.printMetrics) {
- val metricsListener = new MetricsListener(new RecordedMetrics())
- sc.addSparkListener(metricsListener)
- Metrics.initialize(sc)
- Some(metricsListener)
- } else {
- // This avoids recording metrics if we have a recorder left over from previous use of this thread
- Metrics.stopRecording()
- None
- }
- }
-
- def printMetrics(totalTime: Long, metricsListener: Option[MetricsListener]) {
- logInfo("Overall Duration: " + DurationFormatting.formatNanosecondDuration(totalTime))
- if (args.printMetrics && metricsListener.isDefined) {
- // Set the output buffer size to 4KB by default
- val stringWriter = new StringWriter()
- val out = new PrintWriter(stringWriter)
- out.println("Metrics:")
- out.println()
- Metrics.print(out, Some(metricsListener.get.metrics.sparkMetrics.stageTimes))
- out.println()
- metricsListener.get.metrics.sparkMetrics.print(out)
- out.flush()
- logInfo(stringWriter.getBuffer.toString)
- }
- }
-
-}
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala
index 39f83aa34f..59319c56d4 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala
@@ -17,15 +17,16 @@
*/
package org.bdgenomics.adam.cli
+import java.util.logging.Level._
import org.apache.spark.Logging
+import org.bdgenomics.adam.util.ParquetLogger
+import org.bdgenomics.utils.cli._
import scala.Some
import scala.collection.mutable.ListBuffer
-import org.bdgenomics.adam.util.ParquetLogger
-import java.util.logging.Level._
object ADAMMain extends Logging {
- case class CommandGroup(name: String, commands: List[ADAMCommandCompanion])
+ case class CommandGroup(name: String, commands: List[BDGCommandCompanion])
private val commandGroups =
List(
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala
index 4dc213f7e6..2554a5a0f0 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Adam2Fastq.scala
@@ -25,6 +25,7 @@ import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Option => Args4JOption, Argument }
class Adam2FastqArgs extends ParquetLoadSaveArgs {
@@ -40,7 +41,7 @@ class Adam2FastqArgs extends ParquetLoadSaveArgs {
var disableProjection: Boolean = false
}
-object Adam2Fastq extends ADAMCommandCompanion {
+object Adam2Fastq extends BDGCommandCompanion {
override val commandName = "adam2fastq"
override val commandDescription = "Convert BAM to FASTQ files"
@@ -48,10 +49,10 @@ object Adam2Fastq extends ADAMCommandCompanion {
new Adam2Fastq(Args4j[Adam2FastqArgs](cmdLine))
}
-class Adam2Fastq(val args: Adam2FastqArgs) extends ADAMSparkCommand[Adam2FastqArgs] {
+class Adam2Fastq(val args: Adam2FastqArgs) extends BDGSparkCommand[Adam2FastqArgs] {
override val companion = Adam2Fastq
- override def run(sc: SparkContext, job: Job): Unit = {
+ override def run(sc: SparkContext): Unit = {
val projectionOpt =
if (!args.disableProjection)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala
index 233526481f..4e2d333928 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/AlleleCount.scala
@@ -23,9 +23,10 @@ import org.apache.spark.{ Logging, SparkContext }
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele }
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.Argument
-object AlleleCount extends ADAMCommandCompanion {
+object AlleleCount extends BDGCommandCompanion {
val commandName = "allelecount"
val commandDescription = "Calculate Allele frequencies"
@@ -65,10 +66,10 @@ object AlleleCountHelper extends Serializable {
}
}
-class AlleleCount(val args: AlleleCountArgs) extends ADAMSparkCommand[AlleleCountArgs] with Logging {
+class AlleleCount(val args: AlleleCountArgs) extends BDGSparkCommand[AlleleCountArgs] with Logging {
val companion = AlleleCount
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
val adamVariants: RDD[Genotype] = sc.loadGenotypes(args.adamFile)
AlleleCountHelper.countAlleles(adamVariants, args)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala
deleted file mode 100644
index 0aa4af218d..0000000000
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Args4j.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.bdgenomics.adam.cli
-
-import org.kohsuke.args4j.{ Option, CmdLineException, CmdLineParser }
-import scala.collection.JavaConversions._
-
-class Args4jBase {
- @Option(name = "-h", aliases = Array("-help", "--help", "-?"), usage = "Print help")
- var doPrintUsage: Boolean = false
- @Option(name = "-print_metrics", usage = "Print metrics to the log on completion")
- var printMetrics: Boolean = false
-}
-
-object Args4j {
- val helpOptions = Array("-h", "-help", "--help", "-?")
-
- def apply[T <% Args4jBase: Manifest](args: Array[String], ignoreCmdLineExceptions: Boolean = false): T = {
- val args4j: T = manifest[T].runtimeClass.asInstanceOf[Class[T]].newInstance()
- val parser = new CmdLineParser(args4j)
- parser.setUsageWidth(150);
-
- def displayHelp(exitCode: Int = 0) = {
- parser.printUsage(System.out)
- System.exit(exitCode)
- }
-
- // Work around for help processing in Args4j
- if (args.exists(helpOptions.contains(_))) {
- displayHelp()
- }
-
- try {
- parser.parseArgument(args.toList)
- if (args4j.doPrintUsage)
- displayHelp()
- } catch {
- case e: CmdLineException =>
- if (!ignoreCmdLineExceptions) {
- println(e.getMessage)
- displayHelp(1)
- }
- }
-
- args4j
- }
-
-}
-
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala
index c7fb9d66b7..2b0e7494ae 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Bam2ADAM.scala
@@ -24,15 +24,16 @@ import org.apache.hadoop.fs.Path
import org.bdgenomics.adam.converters.SAMRecordConverter
import org.bdgenomics.adam.models.{ RecordGroupDictionary, SequenceDictionary }
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
import parquet.avro.AvroParquetWriter
import scala.collection.JavaConversions._
-object Bam2ADAM extends ADAMCommandCompanion {
+object Bam2ADAM extends BDGCommandCompanion {
val commandName: String = "bam2adam"
val commandDescription: String = "Single-node BAM to ADAM converter (Note: the 'transform' command can take SAM or BAM as input)"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new Bam2ADAM(Args4j[Bam2ADAMArgs](cmdLine))
}
}
@@ -50,7 +51,7 @@ class Bam2ADAMArgs extends Args4jBase with ParquetArgs {
var qSize = 10000
}
-class Bam2ADAM(args: Bam2ADAMArgs) extends ADAMCommand {
+class Bam2ADAM(args: Bam2ADAMArgs) extends BDGCommand {
val companion = Bam2ADAM
val blockingQueue = new LinkedBlockingQueue[Option[(SAMRecord, SequenceDictionary, RecordGroupDictionary)]](args.qSize)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala
index e96bd65a59..8ad727eae2 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/BuildInformation.scala
@@ -17,16 +17,18 @@
*/
package org.bdgenomics.adam.cli
-object BuildInformation extends ADAMCommandCompanion {
+import org.bdgenomics.utils.cli._
+
+object BuildInformation extends BDGCommandCompanion {
val commandName: String = "buildinfo"
val commandDescription: String = "Display build information (use this for bug reports)"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new BuildInformation()
}
}
-class BuildInformation() extends ADAMCommand {
+class BuildInformation() extends BDGCommand {
val companion = BuildInformation
def run() = {
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala
index b56165eda5..0e46e461bf 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CalculateDepth.scala
@@ -29,6 +29,7 @@ import org.bdgenomics.adam.projections.AlignmentRecordField._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.BroadcastRegionJoin
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import scala.io._
/**
@@ -38,12 +39,12 @@ import scala.io._
* It then reports, on standard out, the location and name of each variant along with the
* calculated depth.
*/
-object CalculateDepth extends ADAMCommandCompanion {
+object CalculateDepth extends BDGCommandCompanion {
val commandName: String = "depth"
val commandDescription: String = "Calculate the depth from a given ADAM file, " +
"at each variant in a VCF"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new CalculateDepth(Args4j[CalculateDepthArgs](cmdLine))
}
}
@@ -59,10 +60,10 @@ class CalculateDepthArgs extends Args4jBase with ParquetArgs {
val cartesian: Boolean = false
}
-class CalculateDepth(protected val args: CalculateDepthArgs) extends ADAMSparkCommand[CalculateDepthArgs] {
- val companion: ADAMCommandCompanion = CalculateDepth
+class CalculateDepth(protected val args: CalculateDepthArgs) extends BDGSparkCommand[CalculateDepthArgs] {
+ val companion: BDGCommandCompanion = CalculateDepth
- def run(sc: SparkContext, job: Job): Unit = {
+ def run(sc: SparkContext): Unit = {
val proj = Projection(contig, start, cigar, readMapped)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala
deleted file mode 100644
index 8e9b879373..0000000000
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ComputeVariants.scala
+++ /dev/null
@@ -1,17 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala
index f27f0d878c..b0e957f215 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountContigKmers.scala
@@ -24,9 +24,10 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.ParquetLogger
import org.bdgenomics.formats.avro.NucleotideContigFragment
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-object CountContigKmers extends ADAMCommandCompanion {
+object CountContigKmers extends BDGCommandCompanion {
val commandName = "count_contig_kmers"
val commandDescription = "Counts the k-mers/q-mers from a read dataset."
@@ -46,10 +47,10 @@ class CountContigKmersArgs extends Args4jBase with ParquetArgs {
var printHistogram: Boolean = false
}
-class CountContigKmers(protected val args: CountContigKmersArgs) extends ADAMSparkCommand[CountContigKmersArgs] with Logging {
+class CountContigKmers(protected val args: CountContigKmersArgs) extends BDGSparkCommand[CountContigKmersArgs] with Logging {
val companion = CountContigKmers
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
// Quiet Parquet...
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala
index f4e76e1bec..caf33b0e66 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/CountReadKmers.scala
@@ -25,9 +25,10 @@ import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.ParquetLogger
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-object CountReadKmers extends ADAMCommandCompanion {
+object CountReadKmers extends BDGCommandCompanion {
val commandName = "count_kmers"
val commandDescription = "Counts the k-mers/q-mers from a read dataset."
@@ -51,10 +52,10 @@ class CountReadKmersArgs extends Args4jBase with ParquetArgs {
var repartition: Int = -1
}
-class CountReadKmers(protected val args: CountReadKmersArgs) extends ADAMSparkCommand[CountReadKmersArgs] with Logging {
+class CountReadKmers(protected val args: CountReadKmersArgs) extends BDGSparkCommand[CountReadKmersArgs] with Logging {
val companion = CountReadKmers
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
// Quiet Parquet...
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala
index 47449151b8..d1f1b3dd0a 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fasta2ADAM.scala
@@ -17,16 +17,16 @@
*/
package org.bdgenomics.adam.cli
-import org.apache.hadoop.mapreduce.Job
import org.apache.spark.{ Logging, SparkContext }
import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-object Fasta2ADAM extends ADAMCommandCompanion {
+object Fasta2ADAM extends BDGCommandCompanion {
val commandName: String = "fasta2adam"
val commandDescription: String = "Converts a text FASTA sequence file into an ADAMNucleotideContig Parquet file which represents assembled sequences."
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new Fasta2ADAM(Args4j[Fasta2ADAMArgs](cmdLine))
}
}
@@ -44,10 +44,10 @@ class Fasta2ADAMArgs extends Args4jBase with ParquetSaveArgs {
var fragmentLength: Long = 10000L
}
-class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends ADAMSparkCommand[Fasta2ADAMArgs] with Logging {
+class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends BDGSparkCommand[Fasta2ADAMArgs] with Logging {
val companion = Fasta2ADAM
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
log.info("Loading FASTA data from disk.")
val adamFasta = sc.loadFasta(args.fastaFile, fragmentLength = args.fragmentLength)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala
index db6f3a6d8d..360595e17e 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Features2ADAM.scala
@@ -23,9 +23,10 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.BaseFeature
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.Feature
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.Argument
-object Features2ADAM extends ADAMCommandCompanion {
+object Features2ADAM extends BDGCommandCompanion {
val commandName = "features2adam"
val commandDescription = "Convert a file with sequence features into corresponding ADAM format"
@@ -44,10 +45,10 @@ class Features2ADAMArgs extends Args4jBase with ParquetSaveArgs {
}
class Features2ADAM(val args: Features2ADAMArgs)
- extends ADAMSparkCommand[Features2ADAMArgs] {
+ extends BDGSparkCommand[Features2ADAMArgs] {
val companion = Features2ADAM
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
sc.loadFeatures(args.featuresFile).adamParquetSave(args)
}
}
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala
index 1806c854d0..e4c9f24a12 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/FlagStat.scala
@@ -23,13 +23,14 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.projections.{ Projection, AlignmentRecordField }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.Argument
-object FlagStat extends ADAMCommandCompanion {
+object FlagStat extends BDGCommandCompanion {
val commandName: String = "flagstat"
val commandDescription: String = "Print statistics on reads in an ADAM file (similar to samtools flagstat)"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new FlagStat(Args4j[FlagStatArgs](cmdLine))
}
}
@@ -39,10 +40,10 @@ class FlagStatArgs extends Args4jBase with ParquetArgs {
val inputPath: String = null
}
-class FlagStat(protected val args: FlagStatArgs) extends ADAMSparkCommand[FlagStatArgs] {
- val companion: ADAMCommandCompanion = FlagStat
+class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagStatArgs] {
+ val companion: BDGCommandCompanion = FlagStat
- def run(sc: SparkContext, job: Job): Unit = {
+ def run(sc: SparkContext): Unit = {
val projection = Projection(
AlignmentRecordField.readMapped,
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala
index 4d371189ce..ffae66ee3b 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Flatten.scala
@@ -19,17 +19,18 @@ package org.bdgenomics.adam.cli
import org.apache.avro.Schema
import org.apache.avro.generic.IndexedRecord
-import org.apache.hadoop.mapreduce.Job
import org.apache.spark.rdd.MetricsContext._
import org.apache.spark.{ Logging, SparkContext }
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.util.{ Flattener, HadoopUtil }
+import org.bdgenomics.adam.util.Flattener
+import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.instrumentation.Metrics
+import org.bdgenomics.utils.misc.HadoopUtil
import org.kohsuke.args4j.Argument
import parquet.avro.AvroParquetInputFormat
import parquet.hadoop.util.ContextUtil
-object Flatten extends ADAMCommandCompanion {
+object Flatten extends BDGCommandCompanion {
val commandName = "flatten"
val commandDescription = "Convert a ADAM format file to a version with a flattened " +
"schema, suitable for querying with tools like Impala"
@@ -48,10 +49,10 @@ class FlattenArgs extends Args4jBase with ParquetSaveArgs {
var outputPath: String = null
}
-class Flatten(val args: FlattenArgs) extends ADAMSparkCommand[FlattenArgs] with Logging {
+class Flatten(val args: FlattenArgs) extends BDGSparkCommand[FlattenArgs] with Logging {
val companion = Flatten
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
val job = HadoopUtil.newJob(sc)
val records = sc.newAPIHadoopFile(
@@ -76,4 +77,4 @@ class Flatten(val args: FlattenArgs) extends ADAMSparkCommand[FlattenArgs] with
args.disableDictionaryEncoding,
Some(flatSchema))
}
-}
\ No newline at end of file
+}
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala
index 3e2cc8e147..aaba5a5898 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ListDict.scala
@@ -22,13 +22,14 @@ import org.apache.spark.SparkContext
import org.bdgenomics.adam.models.SequenceRecord
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.Argument
-object ListDict extends ADAMCommandCompanion {
+object ListDict extends BDGCommandCompanion {
val commandName: String = "listdict"
val commandDescription: String = "Print the contents of an ADAM sequence dictionary"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new ListDict(Args4j[ListDictArgs](cmdLine))
}
}
@@ -38,10 +39,10 @@ class ListDictArgs extends Args4jBase with ParquetArgs {
val inputPath: String = null
}
-class ListDict(protected val args: ListDictArgs) extends ADAMSparkCommand[ListDictArgs] {
- val companion: ADAMCommandCompanion = ListDict
+class ListDict(protected val args: ListDictArgs) extends BDGSparkCommand[ListDictArgs] {
+ val companion: BDGCommandCompanion = ListDict
- def run(sc: SparkContext, job: Job): Unit = {
+ def run(sc: SparkContext): Unit = {
val dict = sc.adamDictionaryLoad[AlignmentRecord](args.inputPath)
dict.records.foreach {
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala
deleted file mode 100644
index 25212fb1aa..0000000000
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ParquetArgs.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.bdgenomics.adam.cli
-
-import org.bdgenomics.adam.rdd.{ ADAMSaveArgs, ADAMParquetArgs }
-import org.kohsuke.args4j.{ Argument, Option }
-import parquet.hadoop.metadata.CompressionCodecName
-
-trait ParquetArgs extends Args4jBase with ADAMParquetArgs {
- @Option(required = false, name = "-parquet_block_size", usage = "Parquet block size (default = 128mb)")
- var blockSize = 128 * 1024 * 1024
- @Option(required = false, name = "-parquet_page_size", usage = "Parquet page size (default = 1mb)")
- var pageSize = 1 * 1024 * 1024
- @Option(required = false, name = "-parquet_compression_codec", usage = "Parquet compression codec")
- var compressionCodec = CompressionCodecName.GZIP
- @Option(name = "-parquet_disable_dictionary", usage = "Disable dictionary encoding")
- override var disableDictionaryEncoding = false
- @Option(required = false, name = "-parquet_logging_level", usage = "Parquet logging level (default = severe)")
- var logLevel = "SEVERE"
-}
-
-trait ParquetSaveArgs extends ParquetArgs with ADAMSaveArgs
-
-trait LoadFileArgs {
- @Argument(required = true, metaVar = "INPUT", usage = "The ADAM, BAM or SAM file to load as input", index = 0)
- var inputPath: String = null
-}
-
-trait SaveFileArgs {
- @Argument(required = true, metaVar = "OUTPUT", usage = "The ADAM, BAM or SAM file to save as output", index = 1)
- var outputPath: String = null
-}
-
-trait ParquetLoadSaveArgs extends ParquetSaveArgs with LoadFileArgs with SaveFileArgs
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala
index 98b73efaa6..7938c78e84 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PluginExecutor.scala
@@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.plugins.{ AccessControl, ADAMPlugin }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
/**
@@ -38,11 +39,11 @@ import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
* plugin. The org.bdgenomics.adam.plugins.ADAMPlugin interface defines the
* class that will run using this command.
*/
-object PluginExecutor extends ADAMCommandCompanion {
+object PluginExecutor extends BDGCommandCompanion {
val commandName: String = "plugin"
val commandDescription: String = "Executes an ADAMPlugin"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new PluginExecutor(Args4j[PluginExecutorArgs](cmdLine))
}
}
@@ -62,8 +63,8 @@ class PluginExecutorArgs extends Args4jBase with ParquetArgs {
var pluginArgs: String = ""
}
-class PluginExecutor(protected val args: PluginExecutorArgs) extends ADAMSparkCommand[PluginExecutorArgs] {
- val companion: ADAMCommandCompanion = PluginExecutor
+class PluginExecutor(protected val args: PluginExecutorArgs) extends BDGSparkCommand[PluginExecutorArgs] {
+ val companion: BDGCommandCompanion = PluginExecutor
def loadPlugin[Input <% SpecificRecord: Manifest, Output](pluginName: String): ADAMPlugin[Input, Output] = {
Thread.currentThread()
@@ -85,7 +86,7 @@ class PluginExecutor(protected val args: PluginExecutorArgs) extends ADAMSparkCo
output.map(_.toString).collect().foreach(println)
}
- def run(sc: SparkContext, job: Job): Unit = {
+ def run(sc: SparkContext): Unit = {
val plugin = loadPlugin[AlignmentRecord, Any](args.plugin)
val accessControl = loadAccessControl[AlignmentRecord](args.accessControl)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala
index efc8b8cd35..8ba83c3d13 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintADAM.scala
@@ -18,17 +18,17 @@
package org.bdgenomics.adam.cli
import java.util
-
import org.apache.avro.generic.{ GenericDatumWriter, IndexedRecord }
import org.apache.avro.io.EncoderFactory
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkContext
import org.bdgenomics.adam.util.ParquetFileTraversable
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
import scala.collection.JavaConversions._
-object PrintADAM extends ADAMCommandCompanion {
+object PrintADAM extends BDGCommandCompanion {
val commandName: String = "print"
val commandDescription: String = "Print an ADAM formatted file"
@@ -48,7 +48,7 @@ class PrintADAMArgs extends Args4jBase {
var prettyRaw: Boolean = false
}
-class PrintADAM(protected val args: PrintADAMArgs) extends ADAMSparkCommand[PrintADAMArgs] {
+class PrintADAM(protected val args: PrintADAMArgs) extends BDGSparkCommand[PrintADAMArgs] {
val companion = PrintADAM
/**
@@ -98,7 +98,7 @@ class PrintADAM(protected val args: PrintADAMArgs) extends ADAMSparkCommand[Prin
})
}
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
val output = Option(args.outputFile)
args.filesToPrint.foreach(file => {
displayRaw(sc, file, pretty = args.prettyRaw, output = output)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala
index df20805282..61c288ced9 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintGenes.scala
@@ -23,9 +23,10 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.Feature
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Option => option, Argument }
-object PrintGenes extends ADAMCommandCompanion {
+object PrintGenes extends BDGCommandCompanion {
val commandName: String = "print_genes"
val commandDescription: String = "Load a GTF file containing gene annotations and print the corresponding gene models"
@@ -40,11 +41,11 @@ class PrintGenesArgs extends Args4jBase with ParquetArgs with Serializable {
}
class PrintGenes(protected val args: PrintGenesArgs)
- extends ADAMSparkCommand[PrintGenesArgs] with Serializable {
+ extends BDGSparkCommand[PrintGenesArgs] with Serializable {
val companion = PrintGenes
- def run(sc: SparkContext, job: Job): Unit = {
+ def run(sc: SparkContext): Unit = {
val genes: RDD[Gene] = sc.loadGenes(args.gtfInput)
genes.map(printGene).collect().foreach(println)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala
index 43e4068e62..f928e5c24b 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/PrintTags.scala
@@ -17,25 +17,25 @@
*/
package org.bdgenomics.adam.cli
-import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.projections.AlignmentRecordField._
import org.bdgenomics.adam.projections.Projection
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
-import org.kohsuke.args4j.{ Argument, Option }
+import org.bdgenomics.utils.cli._
+import org.kohsuke.args4j.{ Argument, Option => A4JOption }
/**
* Reads in the tagStrings field of every record, and prints out the set of unique
* tags found in those fields along with the number of records that have each particular
* tag.
*/
-object PrintTags extends ADAMCommandCompanion {
+object PrintTags extends BDGCommandCompanion {
val commandName: String = "print_tags"
val commandDescription: String = "Prints the values and counts of all tags in a set of records"
- def apply(cmdLine: Array[String]): ADAMCommand = {
+ def apply(cmdLine: Array[String]): BDGCommand = {
new PrintTags(Args4j[PrintTagsArgs](cmdLine))
}
}
@@ -44,20 +44,20 @@ class PrintTagsArgs extends Args4jBase with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "The ADAM file to scan for tags", index = 0)
val inputPath: String = null
- @Option(required = false, name = "-list",
+ @A4JOption(required = false, name = "-list",
usage = "When value is set to , also lists the first N attribute fields for ADAMRecords in the input")
var list: String = null
- @Option(required = false, name = "-count",
+ @A4JOption(required = false, name = "-count",
usage = "comma-separated list of tag names; for each tag listed, we print the distinct values and their counts")
var count: String = null
}
-class PrintTags(protected val args: PrintTagsArgs) extends ADAMSparkCommand[PrintTagsArgs] {
- val companion: ADAMCommandCompanion = PrintTags
+class PrintTags(protected val args: PrintTagsArgs) extends BDGSparkCommand[PrintTagsArgs] {
+ val companion: BDGCommandCompanion = PrintTags
- def run(sc: SparkContext, job: Job): Unit = {
+ override def run(sc: SparkContext): Unit = {
val toCount = if (args.count != null) args.count.split(",").toSet else Set()
val proj = Projection(attributes, primaryAlignment, readMapped, readPaired, failedVendorQualityChecks)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala
index 82edd945db..5f23de0b8c 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala
@@ -27,9 +27,10 @@ import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.rich.RichVariant
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-object Transform extends ADAMCommandCompanion {
+object Transform extends BDGCommandCompanion {
val commandName = "transform"
val commandDescription = "Convert SAM/BAM to ADAM format and optionally perform read pre-processing transformations"
@@ -95,7 +96,7 @@ class TransformArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
var forceLoadParquet: Boolean = false
}
-class Transform(protected val args: TransformArgs) extends ADAMSparkCommand[TransformArgs] with Logging {
+class Transform(protected val args: TransformArgs) extends BDGSparkCommand[TransformArgs] with Logging {
val companion = Transform
def apply(rdd: RDD[AlignmentRecord]): RDD[AlignmentRecord] = {
@@ -162,7 +163,7 @@ class Transform(protected val args: TransformArgs) extends ADAMSparkCommand[Tran
adamRecords
}
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
this.apply({
if (args.forceLoadBam) {
sc.loadBam(args.inputPath)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala
index 52cba7a9af..aca26ec3d4 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Vcf2ADAM.scala
@@ -17,15 +17,16 @@
*/
package org.bdgenomics.adam.cli
-import org.bdgenomics.adam.models.{ SequenceDictionary, VariantContext }
-import org.bdgenomics.adam.rdd.ADAMContext._
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.{ Logging, SparkContext }
import org.apache.spark.rdd.RDD
+import org.bdgenomics.adam.models.{ SequenceDictionary, VariantContext }
+import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Option => Args4jOption, Argument }
import java.io.File
-object Vcf2ADAM extends ADAMCommandCompanion {
+object Vcf2ADAM extends BDGCommandCompanion {
val commandName = "vcf2adam"
val commandDescription = "Convert a VCF file to the corresponding ADAM format"
@@ -51,10 +52,10 @@ class Vcf2ADAMArgs extends Args4jBase with ParquetSaveArgs {
var onlyvariants: Boolean = false
}
-class Vcf2ADAM(val args: Vcf2ADAMArgs) extends ADAMSparkCommand[Vcf2ADAMArgs] with DictionaryCommand with Logging {
+class Vcf2ADAM(val args: Vcf2ADAMArgs) extends BDGSparkCommand[Vcf2ADAMArgs] with DictionaryCommand with Logging {
val companion = Vcf2ADAM
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
var dictionary: Option[SequenceDictionary] = loadSequenceDictionary(args.dictionaryFile)
if (dictionary.isDefined)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala
index a526ded134..072430aceb 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VcfAnnotation2ADAM.scala
@@ -15,22 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/*
-* Copyright (c) 2014. Mount Sinai School of Medicine
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
package org.bdgenomics.adam.cli
import org.apache.hadoop.mapreduce.Job
@@ -41,9 +25,10 @@ import org.bdgenomics.adam.converters.VariantAnnotationConverter
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rich.RichVariant
import org.bdgenomics.formats.avro._
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-object VcfAnnotation2ADAM extends ADAMCommandCompanion {
+object VcfAnnotation2ADAM extends BDGCommandCompanion {
val commandName = "anno2adam"
val commandDescription = "Convert a annotation file (in VCF format) to the corresponding ADAM format"
@@ -62,10 +47,10 @@ class VcfAnnotation2ADAMArgs extends Args4jBase with ParquetSaveArgs {
var currentAnnotations: String = null
}
-class VcfAnnotation2ADAM(val args: VcfAnnotation2ADAMArgs) extends ADAMSparkCommand[VcfAnnotation2ADAMArgs] with Logging {
+class VcfAnnotation2ADAM(val args: VcfAnnotation2ADAMArgs) extends BDGSparkCommand[VcfAnnotation2ADAMArgs] with Logging {
val companion = VcfAnnotation2ADAM
- def run(sc: SparkContext, job: Job) {
+ def run(sc: SparkContext) {
log.info("Reading VCF file from %s".format(args.vcfFile))
val annotations: RDD[DatabaseVariantAnnotation] = sc.loadVcfAnnotations(args.vcfFile)
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
index 65d5253cd0..c8d7b16930 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/View.scala
@@ -20,12 +20,12 @@ package org.bdgenomics.adam.cli
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
-import org.bdgenomics.adam.rdd.ADAMSaveArgs
+import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
-import org.bdgenomics.adam.rdd.ADAMContext._
-class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveArgs {
+class ViewArgs extends Args4jBase with ParquetArgs with SaveArgs {
@Argument(required = true, metaVar = "INPUT", usage = "The ADAM, BAM or SAM file to view", index = 0)
var inputPath: String = null
@@ -74,7 +74,7 @@ class ViewArgs extends Args4jBase with ParquetArgs with ADAMSaveArgs {
var outputPathArg: String = null
}
-object View extends ADAMCommandCompanion {
+object View extends BDGCommandCompanion {
val commandName = "view"
val commandDescription = "View certain reads from an alignment-record file."
@@ -93,7 +93,7 @@ object View extends ADAMCommandCompanion {
*
* It is agnostic to its input and output being SAM, BAM, or ADAM files; when printing to stdout it prints SAM.
*/
-class View(val args: ViewArgs) extends ADAMSparkCommand[ViewArgs] {
+class View(val args: ViewArgs) extends BDGSparkCommand[ViewArgs] {
val companion = View
type ReadFilter = (AlignmentRecord => Boolean)
@@ -148,7 +148,7 @@ class View(val args: ViewArgs) extends ADAMSparkCommand[ViewArgs] {
reads
}
- def run(sc: SparkContext, job: Job) = {
+ def run(sc: SparkContext) = {
val reads: RDD[AlignmentRecord] = applyFilters(sc.loadAlignments(args.inputPath))
diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala
index 1a2788a1e0..3c5255fde8 100644
--- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala
+++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Wiggle2Bed.scala
@@ -19,6 +19,7 @@
package org.bdgenomics.adam.cli
import java.io.PrintWriter
+import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.Option
import scala.io.Source
@@ -37,7 +38,7 @@ class Wig2BedArgs extends Args4jBase {
* guarantees where the sync markers are. This makes it difficult to use as a
* "splittable" format, and necessitates processing the file locally.
*/
-object WigFix2Bed extends ADAMCommandCompanion {
+object WigFix2Bed extends BDGCommandCompanion {
val commandName = "wigfix2bed"
val commandDescription = "Locally convert a wigFix file to BED format"
@@ -51,7 +52,7 @@ object WigFix2Bed extends ADAMCommandCompanion {
}
}
-class WigFix2Bed(val args: Wig2BedArgs) extends ADAMCommand {
+class WigFix2Bed(val args: Wig2BedArgs) extends BDGCommand {
val companion = WigFix2Bed
def run() {
diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala
index bbd1a19df5..78a5cce39c 100644
--- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala
+++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/Features2ADAMSuite.scala
@@ -18,10 +18,10 @@
package org.bdgenomics.adam.cli
import java.io._
-
import org.bdgenomics.adam.projections.Projection
import org.bdgenomics.adam.projections.FeatureField._
-import org.bdgenomics.adam.util.{ HadoopUtil, ADAMFunSuite }
+import org.bdgenomics.adam.util.ADAMFunSuite
+import org.bdgenomics.utils.cli.Args4j
import org.bdgenomics.formats.avro.Feature
class Features2ADAMSuite extends ADAMFunSuite {
@@ -42,8 +42,7 @@ class Features2ADAMSuite extends ADAMFunSuite {
val args: Features2ADAMArgs = Args4j.apply[Features2ADAMArgs](argLine)
val features2Adam = new Features2ADAM(args)
- val job = HadoopUtil.newJob()
- features2Adam.run(sc, job)
+ features2Adam.run(sc)
val schema = Projection(featureId, contig, start, strand)
val lister = new ParquetLister[Feature](Some(schema))
@@ -88,8 +87,7 @@ class Features2ADAMSuite extends ADAMFunSuite {
val adamArgLine = "%s %s".format(bedPath, outputPath).split("\\s+")
val adamArgs: Features2ADAMArgs = Args4j.apply[Features2ADAMArgs](adamArgLine)
val features2Adam = new Features2ADAM(adamArgs)
- val job = HadoopUtil.newJob()
- features2Adam.run(sc, job)
+ features2Adam.run(sc)
val schema = Projection(featureId, contig, start, end, value)
val lister = new ParquetLister[Feature](Some(schema))
diff --git a/adam-cli/src/test/scala/FlagStatTest.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala
similarity index 98%
rename from adam-cli/src/test/scala/FlagStatTest.scala
rename to adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala
index eabcdb056a..86d6d1b644 100644
--- a/adam-cli/src/test/scala/FlagStatTest.scala
+++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlagStatSuite.scala
@@ -16,16 +16,17 @@
* limitations under the License.
*/
package org.bdgenomics.adam.cli
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.adam.rdd.read.FlagStat._
import org.bdgenomics.adam.rdd.read.{ DuplicateMetrics, FlagStatMetrics }
import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.formats.avro.AlignmentRecord
-import org.bdgenomics.adam.rdd.read.FlagStat._
+import org.bdgenomics.utils.cli.Args4j
-class FlagStatTest extends ADAMFunSuite {
+class FlagStatSuite extends ADAMFunSuite {
sparkTest("Standard FlagStat test") {
diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala
index d50540bf67..de4217d1e9 100644
--- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala
+++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/FlattenSuite.scala
@@ -18,10 +18,11 @@
package org.bdgenomics.adam.cli
import java.io._
-
import org.apache.avro.generic.GenericRecord
-import org.bdgenomics.adam.util.{ ADAMFunSuite, HadoopUtil }
+import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.formats.avro.Genotype
+import org.bdgenomics.utils.cli.Args4j
+import org.bdgenomics.utils.misc.HadoopUtil
class FlattenSuite extends ADAMFunSuite {
@@ -40,8 +41,7 @@ class FlattenSuite extends ADAMFunSuite {
val argLine = "%s %s".format(inputPath, outputPath).split("\\s+")
val args: Vcf2ADAMArgs = Args4j.apply[Vcf2ADAMArgs](argLine)
val vcf2Adam = new Vcf2ADAM(args)
- val job = HadoopUtil.newJob()
- vcf2Adam.run(sc, job)
+ vcf2Adam.run(sc)
val lister = new ParquetLister[Genotype]()
val records = lister.materialize(outputPath).toSeq
@@ -54,8 +54,7 @@ class FlattenSuite extends ADAMFunSuite {
val flattenArgLine = "%s %s".format(outputPath, flatPath).split("\\s+")
val flattenArgs: FlattenArgs = Args4j.apply[FlattenArgs](flattenArgLine)
val flatten = new Flatten(flattenArgs)
- val flattenJob = HadoopUtil.newJob()
- flatten.run(sc, flattenJob)
+ flatten.run(sc)
val flatLister = new ParquetLister[GenericRecord]()
val flatRecords = flatLister.materialize(flatPath).toSeq
diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala
index 492a499863..0e2fb77a86 100644
--- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala
+++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/PluginExecutorSuite.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.adam.cli
import java.io._
-import org.bdgenomics.adam.util.{ HadoopUtil, ADAMFunSuite }
+import org.bdgenomics.adam.util.ADAMFunSuite
class PluginExecutorSuite extends ADAMFunSuite {
@@ -37,7 +37,7 @@ class PluginExecutorSuite extends ADAMFunSuite {
val pluginExecutor = new PluginExecutor(args)
val bytesWritten = new ByteArrayOutputStream()
- scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob()))
+ scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc))
val outputString = bytesWritten.toString
@@ -60,7 +60,7 @@ class PluginExecutorSuite extends ADAMFunSuite {
val pluginExecutor = new PluginExecutor(args)
val bytesWritten = new ByteArrayOutputStream()
- scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob()))
+ scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc))
val outputString = bytesWritten.toString
@@ -85,7 +85,7 @@ class PluginExecutorSuite extends ADAMFunSuite {
val pluginExecutor = new PluginExecutor(args)
val bytesWritten = new ByteArrayOutputStream()
- scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc, HadoopUtil.newJob()))
+ scala.Console.withOut(bytesWritten)(pluginExecutor.run(sc))
val outputString = bytesWritten.toString
diff --git a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala
index f4138ca4fc..f4b44d064a 100644
--- a/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala
+++ b/adam-cli/src/test/scala/org/bdgenomics/adam/cli/ViewSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.AlignmentRecord
+import org.bdgenomics.utils.cli.Args4j
class ViewSuite extends ADAMFunSuite {
diff --git a/adam-core/pom.xml b/adam-core/pom.xml
index 4e1967888d..415c7ccc15 100644
--- a/adam-core/pom.xml
+++ b/adam-core/pom.xml
@@ -3,13 +3,14 @@
4.0.0
org.bdgenomics.adam
- adam-parent
+ adam-parent_2.10
0.16.1-SNAPSHOT
../pom.xml
- adam-core
+
+ adam-core_2.10
jar
- ADAM: Core
+ ADAM_2.10: Core
@@ -112,18 +113,22 @@
- org.bdgenomics.bdg-utils
- bdg-utils-misc
+ org.bdgenomics.utils
+ utils-misc_2.10
test-jar
test
- org.bdgenomics.bdg-utils
- bdg-utils-metrics
+ org.bdgenomics.utils
+ utils-metrics_2.10
+
+
+ org.bdgenomics.utils
+ utils-io_2.10
- org.bdgenomics.bdg-utils
- bdg-utils-parquet
+ org.bdgenomics.utils
+ utils-cli_2.10
com.esotericsoftware.kryo
@@ -131,7 +136,7 @@
org.scoverage
- scalac-scoverage-plugin_${scala.artifact.suffix}
+ scalac-scoverage-plugin_2.10
org.bdgenomics.bdg-formats
@@ -151,7 +156,7 @@
org.apache.spark
- spark-core_${scala.artifact.suffix}
+ spark-core_2.10
it.unimi.dsi
@@ -183,7 +188,7 @@
org.scalatest
- scalatest_${scala.artifact.suffix}
+ scalatest_2.10
test
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
index e6fdf56890..ffb4c7192e 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/VariantContext.scala
@@ -86,8 +86,5 @@ class VariantContext(
val variant: RichVariant,
val genotypes: Iterable[Genotype],
val databases: Option[DatabaseVariantAnnotation] = None) {
- def this(variant: RichVariant, genotypes: Iterable[Genotype], database: Option[DatabaseVariantAnnotation] = None) = {
- this(ReferencePosition(variant), variant, genotypes, database)
- }
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
index d4b98a1337..707448d3b2 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
@@ -38,9 +38,9 @@ import org.bdgenomics.adam.rdd.features._
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDDFunctions
import org.bdgenomics.adam.rdd.variation._
import org.bdgenomics.adam.rich.RichAlignmentRecord
-import org.bdgenomics.adam.util.HadoopUtil
import org.bdgenomics.formats.avro._
import org.bdgenomics.utils.instrumentation.Metrics
+import org.bdgenomics.utils.misc.HadoopUtil
import org.seqdoop.hadoop_bam.util.SAMHeaderReader
import org.seqdoop.hadoop_bam._
import parquet.avro.{ AvroParquetInputFormat, AvroReadSupport }
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
index 2a9c55818f..1f8ccf861c 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
@@ -20,40 +20,27 @@ package org.bdgenomics.adam.rdd
import java.util.logging.Level
import org.apache.avro.Schema
import org.apache.avro.generic.IndexedRecord
+import org.apache.hadoop.mapreduce.{ OutputFormat => NewOutputFormat, _ }
import org.apache.spark.Logging
import org.apache.spark.rdd.{ InstrumentedOutputFormat, RDD }
import org.apache.spark.rdd.MetricsContext._
import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.models._
-import org.bdgenomics.adam.util.{
- HadoopUtil,
- ParquetLogger
-}
+import org.bdgenomics.adam.util.ParquetLogger
+import org.bdgenomics.utils.cli.SaveArgs
+import org.bdgenomics.utils.misc.HadoopUtil
import parquet.avro.AvroParquetOutputFormat
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.metadata.CompressionCodecName
import parquet.hadoop.util.ContextUtil
-import org.apache.avro.generic.IndexedRecord
-import org.apache.hadoop.mapreduce.{ OutputFormat => NewOutputFormat, _ }
-
-trait ADAMParquetArgs {
- var blockSize: Int
- var pageSize: Int
- var compressionCodec: CompressionCodecName
- var disableDictionaryEncoding: Boolean
-}
-
-trait ADAMSaveArgs extends ADAMParquetArgs {
- var outputPath: String
-}
-trait ADAMSaveAnyArgs extends ADAMSaveArgs {
+trait ADAMSaveAnyArgs extends SaveArgs {
var sortFastqOutput: Boolean
}
class ADAMRDDFunctions[T <% IndexedRecord: Manifest](rdd: RDD[T]) extends Serializable with Logging {
- def adamParquetSave(args: ADAMSaveArgs): Unit = {
+ def adamParquetSave(args: SaveArgs): Unit = {
adamParquetSave(
args.outputPath,
args.blockSize,
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
index 198c129196..79b13f83fa 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDDFunctions.scala
@@ -26,11 +26,9 @@ import org.bdgenomics.adam.converters.FragmentConverter
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSequenceDictionaryRDDAggregator
-import org.bdgenomics.adam.util.{
- HadoopUtil,
- ParquetLogger
-}
+import org.bdgenomics.adam.util.ParquetLogger
import org.bdgenomics.formats.avro._
+import org.bdgenomics.utils.misc.HadoopUtil
import parquet.avro.AvroParquetOutputFormat
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.metadata.CompressionCodecName
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
index 8b50f7fb9f..7bf5b3e882 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala
@@ -34,13 +34,14 @@ import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.models.ReferenceRegion._
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.{ ADAMSaveArgs, ADAMSaveAnyArgs, ADAMSequenceDictionaryRDDAggregator }
+import org.bdgenomics.adam.rdd.{ ADAMSaveAnyArgs, ADAMSequenceDictionaryRDDAggregator }
import org.bdgenomics.adam.rdd.read.correction.{ ErrorCorrection, TrimReads }
import org.bdgenomics.adam.rdd.read.realignment.RealignIndels
import org.bdgenomics.adam.rdd.read.recalibration.BaseQualityRecalibration
import org.bdgenomics.adam.rich.RichAlignmentRecord
import org.bdgenomics.adam.util.MapTools
import org.bdgenomics.formats.avro._
+import org.bdgenomics.utils.cli.SaveArgs
class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord])
extends ADAMSequenceDictionaryRDDAggregator[AlignmentRecord](rdd) {
@@ -67,7 +68,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord])
rdd.filter(overlapsQuery)
}
- def maybeSaveBam(args: ADAMSaveArgs): Boolean = {
+ def maybeSaveBam(args: SaveArgs): Boolean = {
if (args.outputPath.endsWith(".sam")) {
log.info("Saving data in SAM format")
rdd.adamSAMSave(args.outputPath)
@@ -89,7 +90,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord])
false
}
- def adamAlignedRecordSave(args: ADAMSaveArgs) = {
+ def adamAlignedRecordSave(args: SaveArgs) = {
maybeSaveBam(args) || { rdd.adamParquetSave(args); true }
}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala
index 0499015f28..9a75831e85 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/variation/VariationRDDFunctions.scala
@@ -32,8 +32,8 @@ import org.bdgenomics.adam.models.{
import org.bdgenomics.adam.rdd.ADAMSequenceDictionaryRDDAggregator
import org.bdgenomics.adam.rich.RichVariant
import org.bdgenomics.adam.rich.RichGenotype._
-import org.bdgenomics.adam.util.HadoopUtil
import org.bdgenomics.formats.avro.{ Genotype, GenotypeType, DatabaseVariantAnnotation }
+import org.bdgenomics.utils.misc.HadoopUtil
import org.seqdoop.hadoop_bam._
class VariantContextRDDFunctions(rdd: RDD[VariantContext]) extends ADAMSequenceDictionaryRDDAggregator[VariantContext](rdd) with Logging {
@@ -144,7 +144,7 @@ class GenotypeRDDFunctions(rdd: RDD[Genotype]) extends Serializable with Logging
def toVariantContext(): RDD[VariantContext] = {
rdd.keyBy({ g => RichVariant.variantToRichVariant(g.getVariant) })
.groupByKey
- .map { case (v: RichVariant, g) => new VariantContext(v, g, None) }
+ .map { case (v: RichVariant, g) => new VariantContext(ReferencePosition(v), v, g, None) }
}
def filterByOverlappingRegion(query: ReferenceRegion): RDD[Genotype] = {
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala
deleted file mode 100644
index 77d0967839..0000000000
--- a/adam-core/src/main/scala/org/bdgenomics/adam/util/HadoopUtil.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.bdgenomics.adam.util
-
-import org.apache.hadoop.mapreduce.Job
-import org.apache.spark.SparkContext
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.conf.Configuration
-
-object HadoopUtil {
-
- def newJob(): Job = {
- newJob(new Configuration())
- }
-
- def newJob(config: Configuration): Job = {
- val jobClass: Class[_] = Class.forName("org.apache.hadoop.mapreduce.Job")
- try {
- // Use the getInstance method in Hadoop 2
- jobClass.getMethod("getInstance", classOf[Configuration]).invoke(null, config).asInstanceOf[Job]
- } catch {
- case ex: NoSuchMethodException =>
- // Drop back to Hadoop 1 constructor
- jobClass.getConstructor(classOf[Configuration]).newInstance(config).asInstanceOf[Job]
- }
- }
-
- /**
- * Create a job using either the Hadoop 1 or 2 API
- * @param sc A Spark context
- */
- def newJob(sc: SparkContext): Job = {
- newJob(sc.hadoopConfiguration)
- }
-
- /**
- * In Hadoop 2.x, isDir is deprecated in favor of isDirectory
- * @param fs
- * @return
- */
- def isDirectory(fs: FileStatus): Boolean = {
- val fsClass: Class[_] = fs.getClass
- try {
- // Use the isDirectory method in Hadoop 2
- fsClass.getMethod("isDirectory").invoke(fs).asInstanceOf[Boolean]
- } catch {
- case ex: NoSuchMethodException =>
- // Drop back to Hadoop 1 isDir method
- fsClass.getMethod("isDir").invoke(fs).asInstanceOf[Boolean]
- }
- }
-
-}
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
index 259f56ea00..76e259449c 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.fs.{ FileSystem, Path }
import parquet.avro.AvroParquetReader
import org.apache.avro.generic.IndexedRecord
import org.apache.spark.SparkContext
+import org.bdgenomics.utils.misc.HadoopUtil
class ParquetFileTraversable[T <: IndexedRecord](sc: SparkContext, file: Path) extends Traversable[T] {
def this(sc: SparkContext, file: String) = this(sc, new Path(file))
diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala
index e94df85e94..a83a0162fb 100644
--- a/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala
+++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/TwoBitFile.scala
@@ -19,7 +19,7 @@
package org.bdgenomics.adam.util
import java.nio.{ ByteOrder, ByteBuffer }
-import org.bdgenomics.utils.parquet.io.ByteAccess
+import org.bdgenomics.utils.io.ByteAccess
import org.bdgenomics.adam.models.ReferenceRegion
object TwoBitFile {
diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala
index 3673e6946f..02952f1d49 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/util/TwoBitSuite.scala
@@ -19,7 +19,7 @@ package org.bdgenomics.adam.util
import java.io.File
-import org.bdgenomics.utils.parquet.io.LocalFileByteAccess
+import org.bdgenomics.utils.io.LocalFileByteAccess
import org.bdgenomics.adam.models.ReferenceRegion
import org.scalatest.FunSuite
diff --git a/bin/adam-submit b/bin/adam-submit
index b7bd138dbb..b583bcdfcf 100755
--- a/bin/adam-submit
+++ b/bin/adam-submit
@@ -38,21 +38,23 @@ REPO_DIR="$SCRIPT_DIR/adam-cli/target/appassembler/repo/"
fi
# Find the ADAM CLI jar
-CLI_DIR="$REPO_DIR/org/bdgenomics/adam/adam-cli"
-num_versions=$(ls "$CLI_DIR" | wc -l)
+ADAM_DIR="$REPO_DIR/org/bdgenomics/adam/"
+num_versions=$(ls ${ADAM_DIR} | grep cli | wc -l)
if [ "$num_versions" -eq "0" ]; then
- echo "Failed to find adam-cli jar in $CLI_DIR"
+ echo "Failed to find adam-cli jar in $ADAM_DIR"
echo "You need to build ADAM before running this program."
exit 1
fi
if [ "$num_versions" -gt "1" ]; then
- versions_list=$(ls "$CLI_DIR")
- echo "Found multiple ADAM CLI versions in $CLI_DIR:"
+ versions_list=$(ls "$ADAM_DIR" | grep cli)
+ echo "Found multiple ADAM CLI versions in $ADAM_DIR:"
echo "$versions_list"
echo "Please remove all but one."
exit 1
fi
-ADAM_CLI_JAR=$(ls $CLI_DIR/*/adam-cli-*.jar)
+CLI=$(ls "$ADAM_DIR" | grep cli)
+CLI_DIR="${ADAM_DIR}/${CLI}"
+ADAM_CLI_JAR=$(ls $CLI_DIR/*/adam-cli_2.1[01]-*.jar)
# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
diff --git a/distribution/pom.xml b/distribution/pom.xml
index e4a09eb76e..6d90529352 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -8,9 +8,9 @@
../pom.xml
- adam-distribution
+ adam-distribution_2.10
pom
- ADAM: Distribution
+ ADAM_2.10: Distribution
2.2.0
0.99.2
- 0.1.1
+ 0.2.1
1.129
@@ -65,8 +63,8 @@
- target/scala-${scala.version}/classes
- target/scala-${scala.version}/test-classes
+ target/scala-2.10.4/classes
+ target/scala-2.10.4/test-classes
@@ -249,8 +247,7 @@
- ${scala.version}
- incremental
+ 2.10.4
true
-unchecked
@@ -301,7 +298,7 @@
org.scoverage
- scalac-scoverage-plugin_${scala.artifact.suffix}
+ scalac-scoverage-plugin_2.10
${scoverage.version}
@@ -322,47 +319,52 @@
org.scala-lang
scala-library
- ${scala.version}
+ 2.10.4
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
${project.version}
org.bdgenomics.adam
- adam-core
+ adam-core_2.10
${project.version}
test-jar
test
org.bdgenomics.adam
- adam-apis
+ adam-apis_2.10
${project.version}
org.bdgenomics.adam
- adam-apis
+ adam-apis_2.10
${project.version}
test-jar
test
- org.bdgenomics.bdg-utils
- bdg-utils-misc
+ org.bdgenomics.utils
+ utils-misc_2.10
${utils.version}
test-jar
test
- org.bdgenomics.bdg-utils
- bdg-utils-parquet
+ org.bdgenomics.utils
+ utils-cli_2.10
${utils.version}
- org.bdgenomics.bdg-utils
- bdg-utils-metrics
+ org.bdgenomics.utils
+ utils-io_2.10
+ ${utils.version}
+
+
+ org.bdgenomics.utils
+ utils-metrics_2.10
${utils.version}
@@ -399,7 +401,7 @@
org.apache.spark
- spark-core_${scala.artifact.suffix}
+ spark-core_2.10
${spark.version}
provided
@@ -427,6 +429,12 @@
com.twitter
parquet-scala_2.10
${parquet.version}
+
+
+ org.scala-lang
+ scala-library
+
+
org.seqdoop
@@ -451,7 +459,7 @@
org.scalatest
- scalatest_${scala.artifact.suffix}
+ scalatest_2.10
2.2.2
test
@@ -482,21 +490,6 @@
-
- org.fusesource.scalate
- scalate-core_2.10
- 1.6.1
-
-
- org.scalatra
- scalatra-json_2.10
- 2.3.0
-
-
- org.scalatra
- scalatra_2.10
- 2.3.0
-
com.google.guava
guava
@@ -626,8 +619,7 @@
- ${scala.version}
- incremental
+ 2.10.4
true
-unchecked
@@ -649,7 +641,7 @@
org.scoverage
- scalac-scoverage-plugin_${scala.artifact.suffix}
+ scalac-scoverage-plugin_2.10
${scoverage.version}
diff --git a/scripts/changelog.sh b/scripts/changelog.sh
index efab710609..016e05d896 100755
--- a/scripts/changelog.sh
+++ b/scripts/changelog.sh
@@ -19,7 +19,7 @@ echo "# ADAM #"
git log | grep -E "Merge pull request|prepare release" | grep -vi "Revert" | uniq | while read l
do
- release=`echo $l | grep "\[maven-release-plugin\] prepare release" | cut -d "-" -f 5`
+ release=`echo $l | grep "prepare release" | grep -v 2.11 | awk -F'-' '{print $NF}' | awk -F'_' '{ print $1 }'`
PR=`echo $l| grep -E -o "Merge pull request #[^ ]*" | cut -d "#" -f 2`
# echo $l
if [ -n "$release" ]
@@ -30,7 +30,7 @@ do
if [ -n "$PR" ]
then
JSON=`curl -u $username:$password -s https://api.github.com/repos/bigdatagenomics/adam/pulls/$PR | tr "\n" " "`
- DESC_RAW=$(echo $JSON | grep -Po '"title":.*?[^\\]",' | cut -d "\"" -f 4- | head -n 1 | sed -e "s/\\\\//g")
+ DESC_RAW=$(echo $JSON | egrep -o '"title":.*?[^\\]",' | cut -d "\"" -f 4- | head -n 1 | sed -e "s/\\\\//g")
DESC=$(echo ${DESC_RAW%\",})
echo "* ISSUE [$PR](https://github.com/bigdatagenomics/adam/pull/$PR): ${DESC}"
fi
diff --git a/scripts/move_to_scala_2.10.sh b/scripts/move_to_scala_2.10.sh
new file mode 100755
index 0000000000..2ecb6824d4
--- /dev/null
+++ b/scripts/move_to_scala_2.10.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set +x
+
+find . -name "pom.xml" -exec sed -e "s/2.11.4/2.10.4/g" -e "s/2.11/2.10/g" -i .2.10.bak '{}' \;
+find . -name "*.2.10.bak" -exec rm {} \;
diff --git a/scripts/move_to_scala_2.11.sh b/scripts/move_to_scala_2.11.sh
new file mode 100755
index 0000000000..86f7b8793a
--- /dev/null
+++ b/scripts/move_to_scala_2.11.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+set +x
+
+find . -name "pom.xml" -exec sed -e "s/2.10.4/2.11.4/g" -e "s/2.10/2.11/g" -i .2.11.bak '{}' \;
+find . -name "pom.xml" -exec sed -e "s/parquet-scala_2.11/parquet-scala_2.10/g" -i .2.11.2.bak '{}' \;
+find . -name "*.2.11.*bak" -exec rm {} \;
diff --git a/scripts/release/release.sh b/scripts/release/release.sh
index dcb9664ec6..7522397e38 100755
--- a/scripts/release/release.sh
+++ b/scripts/release/release.sh
@@ -1,3 +1,14 @@
#!/bin/sh
+# do scala 2.10 release
mvn -P distribution -Dresume=false release:clean release:prepare release:perform
+
+# do scala 2.11 release
+./scripts/move_to_scala_2.11.sh
+git commit -a -m "Modifying pom.xml files for 2.11 release."
+mvn -P distribution -Dresume=false release:clean release:prepare release:perform
+
+# move back to 2.10 for development
+./scripts/move_to_scala_2.10.sh
+./scripts/changelog.sh | tee CHANGES.md
+git commit -a -m "Modifying pom.xml files to move back to Scala 2.10 for development."